<a href="https://colab.research.google.com/github/shahriarivari/Load-Forecasting-Machine-Learing/blob/main/load_forecasting_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### installing libraries

In [1]:
# installing jdatetime for converting date to Gregotian
!pip install jdatetime

Collecting jdatetime
  Downloading jdatetime-4.1.1-py3-none-any.whl (13 kB)
Installing collected packages: jdatetime
Successfully installed jdatetime-4.1.1


# data manipulation

In [3]:
import pandas as pd # to import the excel file
import numpy as np  # to deal with numbers
import jdatetime    # to convert Jalali datatime to Gregorian

In [151]:
## importing data
data_df = pd.read_excel("consumption.xlsx")
temp_df = pd.read_csv("temperature.csv")

In [155]:
# Apply the conversion directly using a lambda function
data_df['time'] = data_df['date'].apply(lambda x: jdatetime.datetime.strptime(x, '%Y/%m/%d').togregorian().strftime('%Y-%m-%d'))

In [7]:
# making the dataframe into a column vector of time and values
# we make to lists of hours and their repective values and then concatenate them
time_hourly = []
values_hourly = []
list_of_hours = [int(col) for col in data_df.columns[1:]]
for i in range(len(data_df["time"])):
    time = pd.to_datetime(data_df['time'][i], format='%Y-%m-%d') + pd.to_timedelta(list_of_hours, unit='h').values
    values = data_df.loc[i][1:].values
    time_hourly.append(time)
    values_hourly.append(values)

In [156]:
time_concatenated = np.concatenate(time_hourly)
values_concatenated = np.concatenate(values_hourly)

In [174]:
# and now we make the new df
new_df = pd.DataFrame({"time": time_concatenated, "values": values_concatenated})
temp_df["time"] = pd.to_datetime(temp_df['time'], format='%Y-%m-%d')
# new_df.index = pd.to_datetime(new_df['time'], format='%d.%m.%Y %H:%M:%S')
# new_df.drop(columns="time",inplace=True)
print(f" changed the data frame from {data_df.shape} to {new_df.shape} ")

 changed the data frame from (730, 26) to (17520, 2) 


In [188]:
# Concatenate based on the 'Time' column
concat_df = pd.concat([new_df.set_index('time'), temp_df.set_index('time')], axis=1, join='inner')
# Reset the index to make 'Time' a column again
concat_df.reset_index(inplace=True)
concat_df.head(2)

Unnamed: 0,time,values,temp
0,2021-03-21 01:00:00,683.189471,6.7
1,2021-03-21 02:00:00,627.645948,6.1


In [190]:
concat_df["time"]

0       2021-03-21 01:00:00
1       2021-03-21 02:00:00
2       2021-03-21 03:00:00
3       2021-03-21 04:00:00
4       2021-03-21 05:00:00
                ...        
17514   2023-03-20 19:00:00
17515   2023-03-20 20:00:00
17516   2023-03-20 21:00:00
17517   2023-03-20 22:00:00
17518   2023-03-20 23:00:00
Name: time, Length: 17519, dtype: datetime64[ns]

In [222]:
# addding day col
concat_df["week_day"] = concat_df["time"].dt.weekday
concat_df["day"] = concat_df["time"].dt.day
concat_df['month'] = concat_df["time"].dt.month
concat_df["hour"] = concat_df["time"].dt.hour

concat_df['up_of_month']=(concat_df['day']<=10).astype(int)
concat_df['down_of_month']=(concat_df['day']>20).astype(int)
concat_df['sin_day']=np.sin(2*np.pi*concat_df['day']/30)
concat_df['cos_day']=np.cos(2*np.pi*concat_df['day']/30)

concat_df['morning']=((concat_df['hour']>5)&(concat_df['hour']<=12)).astype(int)
concat_df['afternoon']=((concat_df['hour']>12)&(concat_df['hour']<=19)).astype(int)
concat_df['evening']=(1-concat_df['morning']-concat_df['afternoon']).astype(int)

# adding a new feature whether its thursday/friday or not
concat_df['weekend'] = ((concat_df["week_day"] == 4) | (concat_df["week_day"] == 3)).astype(int)
concat_df['time_slot'] = pd.cut(concat_df["hour"], bins=[0, 6, 12, 18, 24], labels=[1, 2, 3, 4], right=False)
concat_df['peak_load'] = ((concat_df["hour"] >= 11) & (concat_df["hour"] <= 15)).astype(int)

concat_df['spring']=((concat_df['month']>=3)&(concat_df['month']<=5)).astype(int)
concat_df['summer']=((concat_df['month']>=6)&(concat_df['month']<=8)).astype(int)
concat_df['fall']=((concat_df['month']>=9)&(concat_df['month']<=11)).astype(int)
concat_df['winter']=((concat_df['month']==12)&(concat_df['month']<=2)).astype(int)

previous_four_mean = np.array(concat_df["values"][0:4].values).astype(np.float32)
for i in range(4,len(concat_df)):
  previous_four_mean = np.append(previous_four_mean, concat_df["values"][i-4:i].values.mean()).astype(np.float32)
concat_df["four_mean"] = previous_four_mean

concat_df.drop( ["day", "week_day", "month","hour"] , axis = 1, inplace = True)
concat_df.head(5)

Unnamed: 0,time,values,temp,weekend,time_slot,peak_load,four_mean,up_of_month,down_of_month,sin_day,cos_day,morning,afternoon,evening,spring,summer,fall,winter
0,2021-03-21 01:00:00,683.189471,6.7,0,1,0,683.189453,0,1,-0.951057,-0.309017,0,0,1,1,0,0,0
1,2021-03-21 02:00:00,627.645948,6.1,0,1,0,627.645935,0,1,-0.951057,-0.309017,0,0,1,1,0,0,0
2,2021-03-21 03:00:00,593.313848,4.2,0,1,0,593.313843,0,1,-0.951057,-0.309017,0,0,1,1,0,0,0
3,2021-03-21 04:00:00,575.394891,3.9,0,1,0,575.394897,0,1,-0.951057,-0.309017,0,0,1,1,0,0,0
4,2021-03-21 05:00:00,569.464668,3.3,0,1,0,619.886047,0,1,-0.951057,-0.309017,0,0,1,1,0,0,0
