## Load Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Activation
from tensorflow.keras.optimizers import Adam

## Import Datasets

In [2]:
df = pd.read_csv('Load Demand with Meteological Data.csv', parse_dates= ['datetime'], index_col= ['datetime'])
df.head(5)

Unnamed: 0_level_0,NET DEMAND,T2M TOC,RH2M TOC,LP2M TOC,WS2M TOC,T2M SAN,RH2M SAN,LP2M SAN,WS2M SAN,T2M DAV,RH2M DAV,LP2M DAV,WS2M DAV,Holiday_ID,Holiday,School
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2017-01-02 01:00:00,970.345,25.865259,0.018576,0.016174,21.850546,23.482446,0.017272,0.001855,10.328949,22.662134,0.016562,0.0961,5.364148,0,0,0
2017-01-02 02:00:00,912.1755,25.899255,0.018653,0.016418,22.166944,23.399255,0.017265,0.001327,10.681517,22.578943,0.016509,0.087646,5.572471,0,0,0
2017-01-02 03:00:00,900.2688,25.93728,0.018768,0.01548,22.454911,23.34353,0.017211,0.001428,10.874924,22.53103,0.016479,0.078735,5.871184,0,0,0
2017-01-02 04:00:00,889.9538,25.957544,0.01889,0.016273,22.110481,23.238794,0.017128,0.002599,10.51862,22.512231,0.016487,0.06839,5.883621,0,0,0
2017-01-02 05:00:00,893.6865,25.97384,0.018981,0.017281,21.186089,23.075403,0.017059,0.001729,9.733589,22.481653,0.016456,0.064362,5.611724,0,0,0


In [None]:
df.tail(10)

## DataFrame Exploration

In [None]:
df.shape

In [None]:
df.info()
df.describe().T

### Histogram

In [None]:
# Plot a histogram
plt.hist(df['NET DEMAND'], bins=10)
plt.xlabel('Demand')
plt.ylabel('Frequency')
plt.title('Distribution of Demand')
plt.show()

In [None]:
sns.distplot(df['NET DEMAND'], bins=10)
plt.xlabel('Demand')
plt.ylabel('Density')
plt.title('Distribution of Demand')
plt.show()

### Checking for null values

In [None]:
df.isnull().sum()

## Data Correlation

In [None]:
df.corr()['NET DEMAND'].sort_values(ascending=False)

In [None]:
# Create correlation matrix
corr_matrix = df.corr()

# Plot correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', square=True)

# Display the plot
plt.show()

In [None]:
column_of_interest = 'NET DEMAND'  # Replace 'column_name' with the actual column name

# Create correlation matrix
corr_matrix = df.corr()

# Filter correlation values for the specified column
corr_values = corr_matrix[column_of_interest]

# Sort the correlation values in descending order
sorted_corr_values = corr_values.sort_values(ascending=False)

# Increase plot size
plt.figure(figsize=(16, 10))

# Plot correlation heatmap sorted by heat levels
sns.heatmap(pd.DataFrame(sorted_corr_values), annot=True, cmap='coolwarm', square=True)

# Set the plot title
plt.title(f'Correlation Heatmap for Column: {column_of_interest}')

# Display the plot
plt.show()

In [None]:
plt.figure(figsize=(20,10))
sns.scatterplot(x='NET DEMAND',y='T2M TOC',data=df)
plt.xlabel('National Demand in KWh')
plt.ylabel('Temperature at 2 metre in Tocumen, Panama City')

## Plots

In [None]:
df.drop(['NET DEMAND', 'Holiday', 'Holiday_ID', 'RH2M SAN', 'RH2M DAV', 'RH2M TOC'], axis=1).resample('D').mean().head(10).plot(figsize=(20, 10))

In [None]:
df['NET DEMAND'].head(100).plot(figsize=(20, 10), legend=True)
plt.ylabel('Demand in Units')
plt.show()

In [None]:
df['NET DEMAND'].resample('D').sum().head(10).plot(figsize=(20, 10), legend=True)
plt.ylabel('Demand in Units')
plt.show()

In [None]:
df['NET DEMAND'].resample('M').sum().head(100).plot(figsize=(20, 10), legend=True)
plt.ylabel('Demand in Units')
plt.show()

## Splitting The Data

In [None]:
x = df.drop('NET DEMAND',axis =1).values
y = df['NET DEMAND'].values

#splitting Train and Test 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=100)

In [None]:
# x_train
x_train.shape

In [None]:
# x_test
x_test.shape

In [None]:
y_test.shape

## Fitting & Transforming

In [None]:
#standardization scaler - fit&transform on train, fit only on test
s_scaler = StandardScaler()

x_train = s_scaler.fit_transform(x_train.astype(float))
x_test = s_scaler.transform(x_test.astype(float))

## Linear Regression

In [None]:
#Linear Regression
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()  
regressor.fit(x_train, y_train)

### Evaluate the model (Intercept and Slope)

In [None]:
regressor.intercept_

In [None]:
regressor.coef_

In [None]:
y_line_pred = regressor.predict(x_test)
df_Lpred = pd.DataFrame({'Test Data': y_test, 'Linear Regression Prediction': y_line_pred})
df_Lpred.head(10)

In [None]:
df_Lpred.head(100).plot(figsize=(20, 10))
plt.ylabel('Demand')
plt.xlabel('Time')
plt.show()

In [None]:
print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_line_pred))) 
print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_line_pred)))  
print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_line_pred))))
print('Variance score is: {:.2f}'.format(metrics.explained_variance_score(y_test,y_line_pred)))

## Neural Network

In [None]:
# having 16 neuron is based on the number of available features

model = Sequential()

model.add(Input(shape=(15,)))
model.add(Dense(225, activation='relu'))
model.add(Dense(225, activation='relu'))
model.add(Dense(225, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [None]:
model.fit(x=x_train,y=y_train,
          validation_data=(x_test,y_test),
          batch_size=128,epochs=50)

In [None]:
model.summary()

In [None]:
y_neo_pred = model.predict(x_test)

## Results

In [None]:
print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_neo_pred)))
print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_neo_pred)))
print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_neo_pred))))
print('Variance score is: {:.2f}'.format(metrics.explained_variance_score(y_test,y_neo_pred)))

## Prediction Visualization

In [None]:
concat_arr = np.concatenate((y_neo_pred.reshape(-1, 1), y_test.reshape(-1, 1)), axis=1)
concat_arr

In [None]:
new_dataframe = pd.DataFrame(concat_arr, columns=['TensorFlow Prediction', 'Test Data'])
new_dataframe

In [None]:
new_dataframe.head(100).plot(figsize=(20, 10))
plt.ylabel('Demand')
plt.xlabel('Time')
plt.show()

## Prediction using the Model

In [None]:
def nn_prediction(n):
    elec_demand = df.drop('nat_demand',axis = 1).iloc[n-1]

    #Scaling/Transform
    elec_demand_scaled = s_scaler.transform(elec_demand.values.reshape(-1,15))
    elec_demand_scaled

    # Prediction data
    prediction = np.squeeze(model.predict(elec_demand_scaled))

    #Test Data
    test_data = df['nat_demand'][n-1]
    
    df_index = df.iloc[n-1]
    df_date = df_index.name.strftime('%Y-%m-%d')  # Format: YYYY-MM-DD
    df_time = df_index.name.strftime('%H:%M:%S')  # Format: HH:MM:SS


    #Difference between the data values
    diff = np.squeeze(model.predict(elec_demand_scaled) - df['nat_demand'][n-1])

    print('The Date is : {} \nThe Time is : {} \nThe Neural Network predicted value is {} and the Test data value is {}.\nThe difference between the data is {}'.format(df_date, df_time,prediction, test_data, diff))


In [None]:
# nn_prediction(1)

In [None]:
def prediction_date(date_time_input):

    # Accessing a specific row based on date and time input
    date_time_input = str(date_time_input)  # Date and time input you want to search for
    date_time = pd.to_datetime(date_time_input)  # Convert string to DateTime

    n = df.index.get_loc(date_time) #This is the row number

    elec_demand = df.drop('nat_demand',axis = 1).iloc[n] #extrating the input features

    #Scaling/Transform
    elec_demand_scaled = s_scaler.transform(elec_demand.values.reshape(-1,15))
    elec_demand_scaled

    # Prediction data
    prediction = np.squeeze(model.predict(elec_demand_scaled))

    #Test Data
    test_data = df['nat_demand'][n]
    
    df_index = df.iloc[n]
    df_date = df_index.name.strftime('%Y-%m-%d')  # Format: YYYY-MM-DD
    df_time = df_index.name.strftime('%H:%M:%S')  # Format: HH:MM:SS


    #Difference between the data values
    diff = np.squeeze(model.predict(elec_demand_scaled) - df['nat_demand'][n])

    print('The Date is : {} \nThe Time is : {} \nThe Neural Network predicted value is {} and the Test data value is {}.\nThe difference between the data is {}'.format(df_date, df_time,prediction, test_data, diff))

    
    

In [None]:
# prediction_date('2015-01-03 01:00:00')

In [None]:
# date_time = input("Enter date&time")

In [None]:
# try:
#     prediction_date(date_time)

# except ValueError:
#     print('Wrong value!!!\nEnter a Time period from 2015-01-03 01:00:00 to 2020-06-27 00:00:00	')

## Results and Final Evaluation

In [None]:
print('Linear Regression')
print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_line_pred))) 
print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_line_pred)))  
print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_line_pred))))
print('Variance score is: {:.2f}'.format(metrics.explained_variance_score(y_test,y_line_pred)))

print('\n*****************************************************************************\n')

print('Tensorflow Keras Prediction')
print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_neo_pred)))
print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_neo_pred)))
print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_neo_pred))))
print('Variance score is: {:.2f}'.format(metrics.explained_variance_score(y_test,y_neo_pred)))

In [None]:
def calculate_accuracy(ypred, ytest):
    # Calculate absolute percentage error for each pair of values
    absolute_percentage_error = np.abs((ypred - ytest) / ytest)
    
    # Calculate mean absolute percentage error (MAPE)
    mean_absolute_percentage_error = np.mean(absolute_percentage_error) * 100
    
    # Calculate accuracy as 100 - MAPE
    accuracy = 100 - mean_absolute_percentage_error
    
    return print('The Accuracy is {}%'.format(accuracy))

In [None]:
calculate_accuracy(y_line_pred, y_test)

In [None]:
calculate_accuracy(y_neo_pred, y_test)