In [3]:
import numpy as np
import pandas as pd
# Synthetic dataset generation for demonstration
np.random.seed(42)
n_samples = 1000

hrs_studied = np.random.uniform(0,8,n_samples)
prev_score = np.random.uniform(0,7,n_samples)
sleep = np.random.normal(7,1,n_samples)
practice_exam_hrs = hrs_studied/2+np.random.normal(0,1,n_samples)
living_at_home = np.random.randint(0,2,n_samples)
hrs_exercising = np.random.uniform(0,2,n_samples)

performance = hrs_studied*5+prev_score*3+sleep*0.1+practice_exam_hrs*0.1+living_at_home*2+hrs_exercising*1 + np.random.normal(0,7,n_samples)

challenge_df = pd.DataFrame({
    "HoursStudied":hrs_studied,
    "PrevScore": prev_score,
    "HoursSleep":sleep,
    "PracticeExamHrs": practice_exam_hrs,
    "LivingAtHome" : living_at_home,
    "HoursExercising" : hrs_exercising,
    "Performance" : performance
})


In [23]:
chargers_df = pd.read_csv("challengedata/LOA/chargers.csv")
duration_df = pd.read_csv("challengedata/LOA/duration.csv")
chargefee_df = pd.read_csv("challengedata/LOA/e_price.csv")
servicefee_df = pd.read_csv("challengedata/LOA/s_price.csv")
sites_df = pd.read_csv("challengedata/LOA/sites.csv")
volume_df = pd.read_csv("challengedata/LOA/volume.csv")
weather_df = pd.read_csv("challengedata/LOA/weather.csv")

# Rename the unnamed column to a more approriate name 'datetime'
duration_df = duration_df.rename(columns={'Unnamed: 0':'datetime'})

# Convert the 'datetime' column to datetime format
duration_df['datetime'] = pd.to_datetime(duration_df['datetime'])

# Melt the table so that it is long rather than wide
duration_long_df = duration_df.melt(id_vars='datetime',var_name='site_id',value_name = 'duration')

# Convert 'site_id' to integer
duration_long_df['site_id'] = duration_long_df['site_id'].astype(int)


# Repeat for the charging fee table
chargefee_df = chargefee_df.rename(columns={'time':'datetime'})
chargefee_df['datetime'] = pd.to_datetime(chargefee_df['datetime'])
chargefee_long_df = chargefee_df.melt(id_vars='datetime',var_name='site_id',value_name='charging_fee')
chargefee_long_df['site_id'] = chargefee_long_df['site_id'].astype(int)


volume_df = volume_df.rename(columns={'Unnamed: 0':'datetime'})
volume_df['datetime'] = pd.to_datetime(volume_df['datetime'])
volume_df['time'] = volume_df['datetime'].dt.time
volume_df['time_str'] = volume_df['datetime'].dt.strftime('%H:%M')
volume_df['date'] = volume_df['datetime'].dt.date
volume_df['date_str'] = volume_df['datetime'].dt.strftime('%Y-%m-%d')

volume_long_df = volume_df.melt(id_vars=['datetime','date','date_str','time','time_str'],var_name='site_id',value_name='volume')
volume_long_df['site_id'] = volume_long_df['site_id'].astype(int)


weather_df = weather_df.rename(columns={'time':'datetime'}) # We want to keep the column naming convention consistent
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])

weather_small_df = weather_df.drop(columns=['temp','humidity','snow','snowdepth','preciptype','winddir','solarenergy','uvindex','conditions'])


charging_df = (
    volume_long_df
    .merge(duration_long_df, how='left', on=['datetime', 'site_id'])
    .merge(chargefee_long_df, how='left', on=['datetime', 'site_id'])
    .merge(weather_small_df, how='left', on='datetime')
)

# Sum all the duration and volume values for each site
charging_grouped_df = (
    charging_df.groupby(by='site_id')
    .agg({
        'duration':'sum',
        'volume':'sum',
        })
    .reset_index())

# sort the site id's
charging_grouped_df = charging_grouped_df.sort_values(by='site_id',ascending=True)

# create a list of all site id's that have more than 0 duration and volume data
used_site_id_list = charging_grouped_df.loc[(charging_grouped_df['duration']>0) & (charging_grouped_df['volume'] > 0)]['site_id'].tolist()

# filter the original data for only sites being used
charging_filtered_df = charging_df[charging_df['site_id'].isin(used_site_id_list)]

challenge_3_df = charging_filtered_df.groupby(by=['date', 'site_id']).agg({'volume':'sum',
       'duration':'sum', 'charging_fee':'mean', 'feelslike':'mean', 'dew':'mean', 'precip':'mean', 'windgust':'mean',
       'windspeed':'mean', 'pressure':'mean', 'visibility':'mean', 'cloudcover':'mean', 'solarradiation':'mean'}).reset_index()

