In [72]:
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

In [73]:
def load_data(directory_path):
    """
    Loads flight delay data from multiple CSV files in a directory.
    :param directory_path: Path to the directory containing CSV files.
    :return: DataFrame.
    """
    files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]
    data_frames = []
    
    for file in files:
        year = file.split('.')[0]
        df = pd.read_csv(os.path.join(directory_path, file)).sample(frac=0.1, random_state=42)
        df['Year'] = int(year)  # Add year column from filename
        data_frames.append(df)
    
    # Combine all dataframes into one
    return pd.concat(data_frames, ignore_index=True)

In [74]:
df_state = pd.read_csv('../datasets/abb-states.csv')
df_state

Unnamed: 0,State,Standard,Postal
0,Alabama,Ala.,AL
1,Alaska,Alaska,AK
2,Arizona,Ariz.,AZ
3,Arkansas,Ark.,AR
4,California,Calif.,CA
5,Canal Zone,C.Z.,CZ
6,Colorado,Colo.,CO
7,Connecticut,Conn.,CT
8,Delaware,Del.,DE
9,District of Columbia,D.C.,DC


In [75]:
df_states=df_state[['State','Postal']]

In [76]:
df_states.head()

Unnamed: 0,State,Postal
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [77]:
df_states.to_csv("../datasets/abb-states1.csv")

In [78]:
df_airports_code = pd.read_csv('../datasets/airports.csv')
df_airports_code.head()

Unnamed: 0,IATA,AIRPORT,CITY,STATE,COUNTRY,LATITUDE,LONGITUDE
0,ABQ,Albuquerque International,Albuquerque,NM,USA,35.040222,-106.609194
1,ANC,Ted Stevens Anchorage International,Anchorage,AK,USA,61.17432,-149.996186
2,ATL,William B Hartsfield-Atlanta Intl,Atlanta,GA,USA,33.640444,-84.426944
3,AUS,Austin-Bergstrom International,Austin,TX,USA,30.194533,-97.669872
4,BDL,Bradley International,Windsor Locks,CT,USA,41.938874,-72.683228


In [79]:
df_airports_codes =df_airports_code[['IATA','AIRPORT','STATE']]
df_airports_codes.shape

(341, 3)

In [80]:
df_merged = pd.merge(df_airports_codes, df_states, left_on='STATE', right_on='Postal', how='left')
df_merged.head()

Unnamed: 0,IATA,AIRPORT,STATE,State,Postal
0,ABQ,Albuquerque International,NM,New Mexico,NM
1,ANC,Ted Stevens Anchorage International,AK,Alaska,AK
2,ATL,William B Hartsfield-Atlanta Intl,GA,Georgia,GA
3,AUS,Austin-Bergstrom International,TX,Texas,TX
4,BDL,Bradley International,CT,Connecticut,CT


In [81]:
df_merged.shape

(341, 5)

In [83]:
df_merged['State'] = df_merged['State'].combine_first(df_merged['Postal'])
df_merged= df_merged.drop(columns=['Postal'])
df_merged.head()

Unnamed: 0,IATA,AIRPORT,STATE,State
0,ABQ,Albuquerque International,NM,New Mexico
1,ANC,Ted Stevens Anchorage International,AK,Alaska
2,ATL,William B Hartsfield-Atlanta Intl,GA,Georgia
3,AUS,Austin-Bergstrom International,TX,Texas
4,BDL,Bradley International,CT,Connecticut


In [84]:
df_merged.shape

(341, 4)

In [85]:
df_merged.to_csv("../datasets/airports_and_states.csv")