In [46]:
# Import packages
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [47]:
# Import data
data = pd.read_csv('training_data_vt2025.csv')

In [48]:
def cyclical_encoding(df, column, period):
    df[column + '_sin'] = np.round(np.sin(2 * np.pi * df[column] / period), 6)
    df[column + '_cos'] = np.round(np.cos(2 * np.pi * df[column] / period), 6)
    df.drop(columns=[column], inplace=True)  # Remove the original column
    return df

In [49]:
def pre_processing_1(data):
    # Make copy of dataset
    data_processed = data.copy()

    # Create new summertime feature
    data_processed['is_summer'] = ((data_processed['month'] >= 3) & (data_processed['month'] <= 11)).astype(int)

    # Normalize calendar data using cosine encoding
    data_processed = cyclical_encoding(data_processed, 'day_of_week', 7)
    data_processed = cyclical_encoding(data_processed, 'hour_of_day', 24)
    data_processed = cyclical_encoding(data_processed, 'month', 12) 

    # Give target feature numerical values
    data_processed['increase_stock'] = data_processed['increase_stock'].replace({'high_bike_demand': 1, 'low_bike_demand': 0})

    # Create binary category of features
    data_processed['is_raining'] = (data_processed['precip'] != 0).astype(int)
    data_processed['is_snowing'] = (data_processed['snowdepth'] != 0).astype(int)
    data_processed['is_visible'] = (data_processed['visibility'] != 16).astype(int)

    # Drop columns
    data_processed = data_processed.drop(columns=['holiday', 'snow', 'snowdepth', 'precip', 'visibility', 'summertime'])

    return data_processed

In [50]:
def pre_processing_2(data):
    # Make copy of dataset
    data_processed = data.copy()

    # Create new summertime feature
    data_processed['is_summer'] = ((data_processed['month'] >= 3) & (data_processed['month'] <= 11)).astype(int)

    # Normalize calendar data using cosine encoding
    data_processed = cyclical_encoding(data_processed, 'day_of_week', 7)
    data_processed = cyclical_encoding(data_processed, 'hour_of_day', 24)
    data_processed = cyclical_encoding(data_processed, 'month', 12) 

    # Give target feature numerical values
    data_processed['increase_stock'] = data_processed['increase_stock'].replace({'high_bike_demand': 1, 'low_bike_demand': 0})

    # Drop columns
    data_processed = data_processed.drop(columns=['holiday', 'snow', 'summertime'])

    return data_processed

In [51]:
new_data_1 = pre_processing_1(data)
new_data_2 = pre_processing_2(data)

  data_processed['increase_stock'] = data_processed['increase_stock'].replace({'high_bike_demand': 1, 'low_bike_demand': 0})
  data_processed['increase_stock'] = data_processed['increase_stock'].replace({'high_bike_demand': 1, 'low_bike_demand': 0})


In [52]:
new_data_1.head()

Unnamed: 0,weekday,temp,dew,humidity,windspeed,cloudcover,increase_stock,is_summer,day_of_week_sin,day_of_week_cos,hour_of_day_sin,hour_of_day_cos,month_sin,month_cos,is_raining,is_snowing,is_visible
0,0,-7.2,-15.0,53.68,16.3,31.6,0,0,-0.974928,-0.222521,0.965926,0.258819,0.5,0.866025,0,0,0
1,1,-1.3,-12.8,40.97,23.9,85.7,0,0,-0.433884,-0.900969,-0.707107,0.707107,0.5,0.866025,0,0,0
2,1,26.9,21.8,73.39,0.0,81.1,0,1,0.433884,-0.900969,-0.707107,0.707107,-0.866025,-0.5,0,0,0
3,0,3.1,-4.0,59.74,19.2,0.0,0,0,-0.781831,0.62349,0.258819,0.965926,0.5,0.866025,0,0,0
4,1,11.7,-11.4,18.71,10.5,44.6,0,1,0.0,1.0,-0.965926,-0.258819,1.0,0.0,0,0,0


In [53]:
new_data_2.head()

Unnamed: 0,weekday,temp,dew,humidity,precip,snowdepth,windspeed,cloudcover,visibility,increase_stock,is_summer,day_of_week_sin,day_of_week_cos,hour_of_day_sin,hour_of_day_cos,month_sin,month_cos
0,0,-7.2,-15.0,53.68,0.0,0.0,16.3,31.6,16.0,0,0,-0.974928,-0.222521,0.965926,0.258819,0.5,0.866025
1,1,-1.3,-12.8,40.97,0.0,0.0,23.9,85.7,16.0,0,0,-0.433884,-0.900969,-0.707107,0.707107,0.5,0.866025
2,1,26.9,21.8,73.39,0.0,0.0,0.0,81.1,16.0,0,1,0.433884,-0.900969,-0.707107,0.707107,-0.866025,-0.5
3,0,3.1,-4.0,59.74,0.0,0.0,19.2,0.0,16.0,0,0,-0.781831,0.62349,0.258819,0.965926,0.5,0.866025
4,1,11.7,-11.4,18.71,0.0,0.0,10.5,44.6,16.0,0,1,0.0,1.0,-0.965926,-0.258819,1.0,0.0


In [54]:
# Save the processed data
new_data_1.to_csv("preprocessed_data_1.csv", index=False)

print("Preprocessing complete. File saved as preprocessed_data_1.csv")

Preprocessing complete. File saved as preprocessed_data_1.csv


In [55]:
new_data_2.to_csv("preprocessed_data_2.csv", index=False)
print("Preprocessing complete. File saved as preprocessed_data_2.csv")

Preprocessing complete. File saved as preprocessed_data_2.csv
