In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Change'CRASH_DATE'
People['CRASH_DATE'] = pd.to_datetime(People['CRASH_DATE'], errors='coerce')

# Split data into Pre-COVID and during COVID
pre_covid_data = People[People['CRASH_DATE'].dt.year < 2020]
covid_data = People[People['CRASH_DATE'].dt.year >= 2020]

# Get the day of the month for both datasets
pre_covid_data['CRASH_DAY'] = pre_covid_data['CRASH_DATE'].dt.day
covid_data['CRASH_DAY'] = covid_data['CRASH_DATE'].dt.day

# Function for seasons
def find_season(crash_date):
    if pd.isnull(crash_date):
        return 'Unknown'
    month = crash_date.month
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# Get the season for each crash
pre_covid_data['SEASON'] = pre_covid_data['CRASH_DATE'].apply(find_season)
covid_data['SEASON'] = covid_data['CRASH_DATE'].apply(find_season)

# four seasons
seasons = ['Winter', 'Spring', 'Summer', 'Fall']

# 4 plots
plt.figure(figsize=(12, 8))

# Loop through the seasons
for i, season in enumerate(seasons):
    plt.subplot(2, 2, i + 1)

    # data for seasons
    pre_covid_season_data = pre_covid_data[pre_covid_data['SEASON'] == season]
    covid_season_data = covid_data[covid_data['SEASON'] == season]

    # Count crashes
    pre_covid_counts = pre_covid_season_data['CRASH_DAY'].value_counts().sort_index()
    covid_counts = covid_season_data['CRASH_DAY'].value_counts().sort_index()

    # Plot pre-COVID and COVID data
    plt.plot(pre_covid_counts.index, pre_covid_counts.values, marker='o', label='Pre-COVID')
    plt.plot(covid_counts.index, covid_counts.values, marker='o', label='COVID')
    plt.title(f'{season} Crashes')
    plt.xlabel('Day of Month')
    plt.ylabel('Crashes')
    plt.grid(True)
    plt.legend()

# layout
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# covid period
covid_period_data = People[People['CRASH_YEAR'] >= 2018]

# Group data by year and month
monthly_trends = covid_period_data.groupby(['CRASH_YEAR', 'CRASH_MONTH']).size().reset_index(name='Crash_Count')

# size
plt.figure(figsize=(10, 5))

# data for each year
for year in monthly_trends['CRASH_YEAR'].unique():
    year_data = monthly_trends[monthly_trends['CRASH_YEAR'] == year]
    plt.plot(year_data['CRASH_MONTH'], year_data['Crash_Count'], marker='o', label=f'Year {year}')

plt.title('Monthly Crash Trends (2018 and later)')
plt.xlabel('Month')
plt.ylabel('Number of Crashes')

# tics for months
plt.xticks(range(1, 13))

# legend
plt.legend(title='Year')
plt.grid(True)
plt.show()


In [None]:
def prepare_and_evaluate_data(df, period_name):
    # X is the input data (features), y is what we are predicting (injury classification)
    X = df[features]
    y = df['INJURY_CLASSIFICATION']

    # columns with text values that need to be converted to numbers
    categorical_cols = [col for col in X.columns if X[col].dtype == 'object']

    # change text columns to numbers
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', OneHotEncoder(), categorical_cols)
        ],
        remainder='passthrough'
    )

    # model pipeline: first process the data, then apply the classifier
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(random_state=42, class_weight='balanced'))
    ])

    # Split the data into training (80%) and testing (20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model with the training data
    pipeline.fit(X_train, y_train)

    # Make predictions using the testing data
    predictions = pipeline.predict(X_test)

    # Print the results for this period
    print(f"Results for {period_name}:")
    print(classification_report(y_test, predictions))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, predictions))

# Run the function for both the 'before COVID' and 'during COVID' data
prepare_and_evaluate_data(before_covid, "Before COVID")
prepare_and_evaluate_data(during_covid, "During COVID")

