In [10]:
import numpy as np
import pandas as pd

# Given DataFrame
df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm', 'Budapest_PaRis', 'Brussels_londOn'],
                   'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],
                   'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],
                   'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )', '12. Air France', '"Swiss Air"']})

In [11]:
# 1. Fill missing values in FlightNumber column
df['FlightNumber'] = df['FlightNumber'].interpolate().astype(int)

In [3]:
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10045,"[23, 47]",KLM(!)
1,MAdrid_miLAN,10055,[],<Air France> (12)
2,londON_StockhOlm,10065,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,10075,[13],12. Air France
4,Brussels_londOn,10085,"[67, 32]","""Swiss Air"""


In [4]:
# 2. Split From_To column and create new temporary DataFrame
temp_df = df['From_To'].str.split('_', expand=True)
temp_df.columns = ['From', 'To']
temp_df

Unnamed: 0,From,To
0,LoNDon,paris
1,MAdrid,miLAN
2,londON,StockhOlm
3,Budapest,PaRis
4,Brussels,londOn


In [5]:
# 3. Standardize city names in temporary DataFrame
temp_df['From'] = temp_df['From'].str.capitalize()
temp_df['To'] = temp_df['To'].str.capitalize()
temp_df

Unnamed: 0,From,To
0,London,Paris
1,Madrid,Milan
2,London,Stockholm
3,Budapest,Paris
4,Brussels,London


In [6]:
# 4. Drop From_To column from original DataFrame and attach temporary DataFrame
df.drop('From_To', axis=1, inplace=True)
df = pd.concat([df, temp_df], axis=1)
df

Unnamed: 0,FlightNumber,RecentDelays,Airline,From,To
0,10045,"[23, 47]",KLM(!),London,Paris
1,10055,[],<Air France> (12),Madrid,Milan
2,10065,"[24, 43, 87]",(British Airways. ),London,Stockholm
3,10075,[13],12. Air France,Budapest,Paris
4,10085,"[67, 32]","""Swiss Air""",Brussels,London


In [7]:
# 5. Expand RecentDelays column into separate columns
delays = df['RecentDelays'].apply(pd.Series)
delays.columns = [f"delay_{i+1}" for i in range(delays.shape[1])]

delays

Unnamed: 0,delay_1,delay_2,delay_3
0,23.0,47.0,
1,,,
2,24.0,43.0,87.0
3,13.0,,
4,67.0,32.0,


In [9]:
# Replace RecentDelays column in original DataFrame with delays DataFrame
df.drop('RecentDelays', axis=1, inplace=True)
df = pd.concat([df, delays], axis=1)

df

KeyError: "['RecentDelays'] not found in axis"