## Title

In [47]:
import pandas as pd

In [48]:
df = pd.read_csv("../data/allegiant_routes_info.csv")
df = df[["nsmiles", "Route"]].rename({"Route": "route", "nsmiles": "distance"}, axis=1)
df["airport_1"] = df["route"].str.split("_").str[0]
df["airport_2"] = df["route"].str.split("_").str[1]
df = df.drop(columns=["route"], axis=1).drop_duplicates()
display(df.head())


Unnamed: 0,distance,airport_1,airport_2
0,1342,AUS,IAD
1,1340,AUS,IAD
2,587,BNA,IAD
3,588,BNA,IAD
4,612,BNA,PIE


In [49]:
historical_df = pd.read_csv("../data/cleaned_data.csv")
# First aggregate historical_df to get unique airport pairs
historical_df_unique = historical_df.groupby(['airport_1', 'airport_2']).agg({
    'daily_passengers': 'mean',
    'state_1': 'first',
    'city_1': 'first',
    'state_2': 'first',
    'city_2': 'first',
    'population_1': 'mean',
    'density_1': 'mean',
    'population_2': 'mean',
    'density_2': 'mean',
    'lat_1': 'first',
    'lon_1': 'first',
    'lat_2': 'first',
    'lon_2': 'first'
}).reset_index()
historical_df_unique.shape[0]

4067

In [50]:
# Ensure the columns to merge on are of the same type
df['airport_1'] = df['airport_1'].astype(str)
df['airport_2'] = df['airport_2'].astype(str)
historical_df['airport_1'] = historical_df['airport_1'].astype(str)
historical_df['airport_2'] = historical_df['airport_2'].astype(str)

# Merge the dataframes on airport_1 and airport_2
merged_df = pd.merge(
    df,
    historical_df_unique,
    on=['airport_1', 'airport_2'],
    how='left'  # Use 'left' to keep all rows from df
)

# Display the merged dataframe
display(merged_df.head())

Unnamed: 0,distance,airport_1,airport_2,daily_passengers,state_1,city_1,state_2,city_2,population_1,density_1,population_2,density_2,lat_1,lon_1,lat_2,lon_2
0,1342,AUS,IAD,222.211864,TX,Austin,DC,Washington,1905945.0,1154.1,5116378.0,4235.7,30.264979,-97.746598,38.892062,-77.019912
1,1340,AUS,IAD,222.211864,TX,Austin,DC,Washington,1905945.0,1154.1,5116378.0,4235.7,30.264979,-97.746598,38.892062,-77.019912
2,587,BNA,IAD,108.618644,TN,Nashville,DC,Washington,1177657.0,555.4,5116378.0,4235.7,36.166687,-86.779932,38.892062,-77.019912
3,588,BNA,IAD,108.618644,TN,Nashville,DC,Washington,1177657.0,555.4,5116378.0,4235.7,36.166687,-86.779932,38.892062,-77.019912
4,612,BNA,PIE,42.76,TN,Nashville,FL,Tampa,1177657.0,555.4,2861173.0,1320.9,36.166687,-86.779932,37.8606,-78.804199


In [51]:
# Verify the number of rows is unchanged
print(f"Original df rows: {len(df)}")
print(f"Merged df rows: {len(merged_df)}")

Original df rows: 321
Merged df rows: 321


In [53]:
merged_df.to_csv("../data/allegiant_routes_info_transformed.csv", index=False)