In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN',
'londON_StockhOlm','Budapest_PaRis', 'Brussels_londOn'],
'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],
'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],
'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )',
'12. Air France', '"Swiss Air"']})

In [3]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10045.0,"[23, 47]",KLM(!)
1,MAdrid_miLAN,,[],<Air France> (12)
2,londON_StockhOlm,10065.0,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,,[13],12. Air France
4,Brussels_londOn,10085.0,"[67, 32]","""Swiss Air"""


### Adding 10 to the Flight Number

In [4]:
df["FlightNumber"] = df["FlightNumber"]+10

In [5]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10055.0,"[23, 47]",KLM(!)
1,MAdrid_miLAN,,[],<Air France> (12)
2,londON_StockhOlm,10075.0,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,,[13],12. Air France
4,Brussels_londOn,10095.0,"[67, 32]","""Swiss Air"""


In [6]:
df["FlightNumber"].dtype

dtype('float64')

### Filling the NAN Values

In [7]:
df["FlightNumber"].isna().sum()

2

In [8]:
df["FlightNumber"].fillna(value = 0, inplace= True)

In [9]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10055.0,"[23, 47]",KLM(!)
1,MAdrid_miLAN,0.0,[],<Air France> (12)
2,londON_StockhOlm,10075.0,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,0.0,[13],12. Air France
4,Brussels_londOn,10095.0,"[67, 32]","""Swiss Air"""


### Converting the Flight Number from Float to Int

In [10]:
df["FlightNumber"] = df["FlightNumber"].astype(int)

In [11]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10055,"[23, 47]",KLM(!)
1,MAdrid_miLAN,0,[],<Air France> (12)
2,londON_StockhOlm,10075,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,0,[13],12. Air France
4,Brussels_londOn,10095,"[67, 32]","""Swiss Air"""


### Splitting the From_To Columns

In [12]:
df["From_Point"] = df["From_To"].str.split("_").str[0]

In [13]:
df["To_Point"] = df["From_To"].str.split("_").str[1]

In [14]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline,From_Point,To_Point
0,LoNDon_paris,10055,"[23, 47]",KLM(!),LoNDon,paris
1,MAdrid_miLAN,0,[],<Air France> (12),MAdrid,miLAN
2,londON_StockhOlm,10075,"[24, 43, 87]",(British Airways. ),londON,StockhOlm
3,Budapest_PaRis,0,[13],12. Air France,Budapest,PaRis
4,Brussels_londOn,10095,"[67, 32]","""Swiss Air""",Brussels,londOn


### Deleting the From_TO Column

In [15]:
df.drop("From_To", axis = 1, inplace= True)

In [16]:
df

Unnamed: 0,FlightNumber,RecentDelays,Airline,From_Point,To_Point
0,10055,"[23, 47]",KLM(!),LoNDon,paris
1,0,[],<Air France> (12),MAdrid,miLAN
2,10075,"[24, 43, 87]",(British Airways. ),londON,StockhOlm
3,0,[13],12. Air France,Budapest,PaRis
4,10095,"[67, 32]","""Swiss Air""",Brussels,londOn


### Capitalization of the From/To columns

In [17]:
df["From_Point"] = df["From_Point"].str.capitalize()
df

Unnamed: 0,FlightNumber,RecentDelays,Airline,From_Point,To_Point
0,10055,"[23, 47]",KLM(!),London,paris
1,0,[],<Air France> (12),Madrid,miLAN
2,10075,"[24, 43, 87]",(British Airways. ),London,StockhOlm
3,0,[13],12. Air France,Budapest,PaRis
4,10095,"[67, 32]","""Swiss Air""",Brussels,londOn


In [18]:
df["To_Point"] = df["To_Point"].str.capitalize()
df

Unnamed: 0,FlightNumber,RecentDelays,Airline,From_Point,To_Point
0,10055,"[23, 47]",KLM(!),London,Paris
1,0,[],<Air France> (12),Madrid,Milan
2,10075,"[24, 43, 87]",(British Airways. ),London,Stockholm
3,0,[13],12. Air France,Budapest,Paris
4,10095,"[67, 32]","""Swiss Air""",Brussels,London


### Splitting the RecentDelays Columns to 3 Columns named Delay_1, Delay _2, Delay_3

In [21]:
df2  = pd.DataFrame(df['RecentDelays'].to_list(), columns=['Delay_1','Delay_2', 'Delay_3'])

In [22]:
df2

Unnamed: 0,Delay_1,Delay_2,Delay_3
0,23.0,47.0,
1,,,
2,24.0,43.0,87.0
3,13.0,,
4,67.0,32.0,


In [26]:
lst = [df,df2]

In [30]:
new_df = pd.concat(lst, axis = 1)

In [31]:
new_df

Unnamed: 0,FlightNumber,RecentDelays,Airline,From_Point,To_Point,Delay_1,Delay_2,Delay_3
0,10055,"[23, 47]",KLM(!),London,Paris,23.0,47.0,
1,0,[],<Air France> (12),Madrid,Milan,,,
2,10075,"[24, 43, 87]",(British Airways. ),London,Stockholm,24.0,43.0,87.0
3,0,[13],12. Air France,Budapest,Paris,13.0,,
4,10095,"[67, 32]","""Swiss Air""",Brussels,London,67.0,32.0,


### Dropping the Recent Delays Columns

In [33]:
new_df = new_df.drop("RecentDelays", axis=1)

In [34]:
new_df

Unnamed: 0,FlightNumber,Airline,From_Point,To_Point,Delay_1,Delay_2,Delay_3
0,10055,KLM(!),London,Paris,23.0,47.0,
1,0,<Air France> (12),Madrid,Milan,,,
2,10075,(British Airways. ),London,Stockholm,24.0,43.0,87.0
3,0,12. Air France,Budapest,Paris,13.0,,
4,10095,"""Swiss Air""",Brussels,London,67.0,32.0,
