In [106]:
# Import the pandas library for data manipulation and analysis, using the alias 'pd'
import pandas as pd

# Import the 'create_engine' function from SQLAlchemy to connect to a database
from sqlalchemy import create_engine

# Import a custom function 'enrich_dataframe' from the local 'weatherdata' module
from weatherdata import enrich_dataframe

In [107]:
# -------------------------
# Display options
# -------------------------
# Set pandas to display up to 40 columns when printing DataFrames
pd.set_option('display.max_columns', 40)


In [110]:
# -------------------------
# Load data and create satisfaction flag
# -------------------------
df = pd.read_csv('data.csv')

# Create a new binary column 'satisfaction': 1 if 'star_rating' >= 4, otherwise 0
df['satisfaction'] = (df['star_rating'] >= 4).astype(int)

In [111]:
# List of columns to impute by mode
cols_to_impute = [
    'has_tv', 'caters', 'alcohol_flag', 'drive_thru',
    'good_for_kids', 'take_out', 'delivery', 'accepts_credit_cards'
]

# For each column in the list:
for col in cols_to_impute:
    mode_val = df[col].mode(dropna=True)[0]  # get the mode
    df[col] = df[col].fillna(mode_val)       # assign back without inplace


In [113]:
# -------------------------
# Enrich and save data
# -------------------------
# Enrich the DataFrame using the custom 'enrich_dataframe' function (adds weather info)

df_enriched = enrich_dataframe(df)
df_enriched.to_csv("dataWeather.csv", index=False)

Fetching Weather: 100%|██████████| 67726/67726 [59:07<00:00, 19.09it/s]   
