In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv('../data/cleaned_data.csv')


# Convert columns to datetime objects
df['Event Date'] = pd.to_datetime(df['Event Date'], format='%Y %B %d')
df['Event Time'] = pd.to_datetime(df['Event Time'].astype(str)).dt.time


# Extract features from Event Date
df['Month'] = df['Event Date'].dt.month
df['Day'] = df['Event Date'].dt.day
df['Weekday'] = df['Event Date'].dt.day_name()

# Create season feature (winter vs non-winter)
df['Season'] = df['Month'].apply(lambda m: 'winter' if m in [12, 1, 2] else 'non-winter')

# Convert Event Time to extract the hour (assuming it's in 24-hour format)
# Since 'Event Time' is already datetime.time, you can extract hour directly:
df['Hour'] = df['Event Time'].apply(lambda t: t.hour) #Using apply to access the hour attribute

# Categorize time of day
def categorize_time(hour):
    if 5 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 17:
        return 'afternoon'
    elif 17 <= hour < 21:
        return 'evening'
    else:
        return 'night'

df['Time Of Day'] = df['Hour'].apply(categorize_time)

# Create rush hour flag
def is_rush_hour(hour):
    return 1 if (7 <= hour < 10 or 16 <= hour < 19) else 0

df['Rush Hour'] = df['Hour'].apply(is_rush_hour) #Removed extra =

# Select columns to display for checking
columns = ['Event Date', 'Event Time', 'Hour', 'Time Of Day', 'Rush Hour', 'Season', 'Weekday', 'Day', 'Month']
print(df[columns].head())

# Store the new dataset to a new CSV file
df = pd.read_csv('../data/feature_engineered_data.csv', index=False)



  event_date event_time  hour time_of_day  rush_hour      season    weekday  \
0 2024-11-30   08:46:00     8     morning          1  non-winter   Saturday   
1 2024-11-30   19:09:00    19     evening          0  non-winter   Saturday   
2 2024-11-29   20:30:00    20     evening          0  non-winter     Friday   
3 2024-11-29   13:03:00    13   afternoon          0  non-winter     Friday   
4 2024-11-27   15:20:00    15   afternoon          0  non-winter  Wednesday   

   day  month  
0   30     11  
1   30     11  
2   29     11  
3   29     11  
4   27     11  
