In [None]:
# Importing the required Python libraries
import pandas as pd
import numpy as np
import os

In [None]:
# Folder path containing the CSV files
folder_path = 'bremen_weather_datasets/'

# Initializing an empty list to store the DataFrames
dataframes = []

# Iterating over the files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        dataframes.append(df)

# Merging the DataFrames together
weather_df = pd.concat(dataframes)

# Sorting the dataframe based on "datetime" column
weather_df = weather_df.sort_values(by='datetime')

In [None]:
# Keeping only the columns representing weather features that can have some impact towards the car sharing demand
weather_df = weather_df[['datetime', 'temp', 'windspeed', 'visibility', 'conditions']]
weather_df

In [None]:
# Resetting the index of weather dataframe and droping the existing index.
weather_df = weather_df.reset_index(drop=True)
weather_df

In [None]:
# Importing the simulated dataset containing demand, respective hour and day of the week
train_df = pd.read_csv('train_data.csv')
train_df

In [None]:
# Converting the first value (corresponding to January 1st 2022) of the "datetime" column, to datetime format
jan_1_2022 = pd.to_datetime(weather_df.iloc[0]['datetime'])

# Getting the weekday name from the datetime value
weekday_name = jan_1_2022.day_name()
weekday_name

In [None]:
# Since the train dataset starts from Monday, we need to remove the records of the weather dataframe
# corresponding to Saturday and Sunday. 2 days are equal to 48 hourly weather records.
# Removing first 48 records from the weather dataset
weather_df = weather_df.drop(weather_df.index[:48])

# Removing the same number of records (48 last records) from the train dataset, since we need to concatenate them
train_df = train_df.head(len(weather_df))

In [None]:
# Resetting the index of weather dataframe and droping the existing index.
weather_df = weather_df.reset_index(drop=True)
weather_df

In [None]:
new_train_df = pd.concat([train_df, weather_df], axis=1)
new_train_df

In [None]:
# Determining the heavy rain conditions
heavy_rain_first_condition = new_train_df['Specific hour'].isin(list(range(7, 13)))
heavy_rain_sec_condition = new_train_df['conditions'].isin(['Rain, Overcast', 'Rain, Partially cloudy'])

In [None]:
# Modifying the demand value accordingly
new_train_df.loc[heavy_rain_first_condition & heavy_rain_sec_condition, 'Demand'] *= 0.8
new_train_df

In [None]:
# Determining the light rain conditions
light_rain_first_condition = new_train_df['Specific hour'].isin(list(range(13, 18)))
light_rain_sec_condition = new_train_df['conditions'].isin(['Rain', 'Partially cloudy', 'Overcast'])

In [None]:
# Modifying the demand value accordingly
new_train_df.loc[light_rain_first_condition & light_rain_sec_condition, 'Demand'] *= 1.1
new_train_df

In [None]:
# Converting the values of the "demand" column from float to int
new_train_df['Demand'] = new_train_df['Demand'].round().astype(int)

In [None]:
new_train_df.to_csv('new_train_data.csv', index=False)