In [1]:
import pandas as pd

In [2]:
def get_data_by_airport(df: pd.DataFrame, city: str) -> pd.DataFrame:
    return df[(df['Origin'].str.contains(city) == True)]


In [3]:
def one_hot_encode(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
    try:
        dummy1 = pd.get_dummies(df[column_name])
        dummy1 = dummy1.add_prefix(f"{column_name} - ")
        df = pd.concat([df, dummy1], axis=1).drop(column_name, axis=1)
        df.head()
    except Exception as e:
        print(f"Error in one_hot_encode: {e}")
    return df

In [4]:
weather_df = pd.read_csv('weather_data.csv')

In [5]:
names = {
    'LOS ANGELES INTERNATIONAL AIRPORT, CA US': 'LAX',
    'SEATTLE TACOMA AIRPORT, WA US': 'SEA',
    'ORLANDO INTERNATIONAL AIRPORT, FL US': 'MCO',
    'BUENA VENTURA LAKES 6.0 ENE, FL US': 'MCO',
    'DAL FTW WSCMO AIRPORT, TX US': 'DFW',
    'JFK INTERNATIONAL AIRPORT, NY US': 'JFK',
}
weather_df['NAME'] = [names[var] for var in weather_df['NAME']]

In [6]:
weather_removables = [
    'STATION','AWND_ATTRIBUTES','DAPR','DAPR_ATTRIBUTES','MDPR',
    'MDPR_ATTRIBUTES','PGTM', 'PGTM_ATTRIBUTES','TAVG_ATTRIBUTES',
    'TMAX_ATTRIBUTES','TMIN_ATTRIBUTES','WDF2_ATTRIBUTES','WDF5_ATTRIBUTES',
    'WSF2_ATTRIBUTES','WSF5_ATTRIBUTES','WT01','WT02','WT03','WT04','WT05',
    'WT05_ATTRIBUTES','WT06','WT06_ATTRIBUTES','WT07','WT07_ATTRIBUTES',
    'WT08','WT09','WT09_ATTRIBUTES' 
]
for remove in weather_removables:
    try:
        weather_df = weather_df.drop(remove, axis=1)
    except Exception as e:
        print(f'Already Removed {remove} or {e}')

In [7]:
weather_one_hot_columns = [
    'PRCP_ATTRIBUTES','SNOW_ATTRIBUTES','SNWD_ATTRIBUTES','WT01_ATTRIBUTES',
    'WT02_ATTRIBUTES','WT03_ATTRIBUTES','WT04_ATTRIBUTES','WT08_ATTRIBUTES'
]
for col in weather_one_hot_columns:
    weather_df = one_hot_encode(weather_df, col)

In [8]:
weather_df['DATE'] = pd.to_datetime(weather_df['DATE'])

In [10]:
weather_df.to_csv('clean data/clean_weather.csv')