In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pprint
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [3]:
from keras.models import load_model

# Load the saved model
regressor = load_model("/content/drive/MyDrive/TASK1_VERV/electricity-consumption/models/my_lstm_model.h5")


In [4]:
import pandas as pd

def read_file(file_path):
    """
    Read the CSV file and return the DataFrame.

    Parameters:
    - file_path: Path to the CSV file

    Returns:
    - df: DataFrame containing the data
    """
    df = pd.read_csv(file_path)
    return df


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def preprocess_data(df):
    """
    Preprocess the data for LSTM model training and prediction.

    Parameters:
    - df: DataFrame containing the data

    Returns:
    - X_train: Processed training features
    - y_train: Processed training labels
    - X_test: Processed testing features
    - y_test: Testing labels
    """
    # Reformat Date Time Columns
    dataset = df
    dataset["Datetime"] = pd.to_datetime(df["Datetime"])
    dataset["Month"] = dataset["Datetime"].dt.month
    dataset["Year"] = dataset["Datetime"].dt.year
    dataset["Date"] = dataset["Datetime"].dt.date
    dataset["Time"] = dataset["Datetime"].dt.time
    dataset["Week"] = dataset["Datetime"].dt.week
    dataset["Day"] = dataset["Datetime"].dt.day_name()
    dataset = dataset.set_index("Datetime")

    # Resample data to daily frequency
    NewDataSet = dataset.resample('D').sum()

    # Splitting data
    TestData = NewDataSet.tail(100)
    Training_Set = NewDataSet.iloc[:, 0:1]
    Training_Set = Training_Set[:-60]

    # Normalization
    sc = MinMaxScaler(feature_range=(0, 1))
    Train = sc.fit_transform(Training_Set)

    X_Train = []
    Y_Train = []

    for i in range(60, Train.shape[0]):
        X_Train.append(Train[i-60:i])
        Y_Train.append(Train[i])

    X_Train = np.array(X_Train)
    Y_Train = np.array(Y_Train)
    X_Train = np.reshape(X_Train, newshape=(X_Train.shape[0], X_Train.shape[1], 1))

    Df_Total = pd.concat((NewDataSet[["AEP_MW"]], TestData[["AEP_MW"]]), axis=0)
    inputs = Df_Total[len(Df_Total) - len(TestData) - 60:].values
    inputs = inputs.reshape(-1, 1)
    inputs = sc.transform(inputs)

    X_test = []
    for i in range(60, 160):
        X_test.append(inputs[i-60:i])

    X_test = np.array(X_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    y_test = TestData["AEP_MW"].to_list()

    return X_Train, Y_Train, X_test, y_test,sc


In [6]:
def predict_lstm2(regressor, X_test, y_test, scaler):
    """
    Make predictions using the trained LSTM model.

    Parameters:
    - regressor: Trained LSTM model
    - X_test: Testing features
    - y_test: True labels for testing
    - scaler: Scaler object for inverse transforming predictions

    Returns:
    - Machine_Df: DataFrame with True and Predicted values
    - mse: Mean Squared Error
    - mae: Mean Absolute Error
    """
    predicted_stock_price = regressor.predict(X_test)
    predicted_stock_price = scaler.inverse_transform(predicted_stock_price)

    True_MegaWatt = y_test
    Predicted_MegaWatt = [x[0] for x in predicted_stock_price]
    dates = pd.date_range(start="2023-06-09", periods=len(True_MegaWatt), freq='D')

    Machine_Df = pd.DataFrame(data={
        "Date": dates,
        "TrueMegaWatt": True_MegaWatt,
        "PredictedMeagWatt": Predicted_MegaWatt
    })

    # Convert lists to numpy arrays
    True_MegaWatt = np.array(True_MegaWatt)
    Predicted_MegaWatt = np.array(Predicted_MegaWatt)

    mse = np.mean((True_MegaWatt - Predicted_MegaWatt) ** 2)
    mae = np.mean(np.abs(True_MegaWatt - Predicted_MegaWatt))

    return Machine_Df, mse, mae


In [7]:
# Read the file
file_path = "/content/drive/MyDrive/TASK1_VERV/electricity-consumption/AEP_hourly.csv"
df = read_file(file_path)

# Preprocess the data
X_train, Y_train, X_test, y_test ,sc= preprocess_data(df)

# Train the LSTM model (you already have this code)

# Make predictions
Machine_Df, mse, mae = predict_lstm2(regressor, X_test, y_test, sc)

# Plot and display results (you already have this code)


  dataset["Week"] = dataset["Datetime"].dt.week
  NewDataSet = dataset.resample('D').sum()




In [8]:
Machine_Df

Unnamed: 0,Date,TrueMegaWatt,PredictedMeagWatt
0,2023-06-09,315787.0,225294.890625
1,2023-06-10,311136.0,339052.031250
2,2023-06-11,293702.0,360516.218750
3,2023-06-12,291763.0,343531.093750
4,2023-06-13,322644.0,301976.812500
...,...,...,...
95,2023-09-12,368834.0,363845.562500
96,2023-09-13,364327.0,380176.843750
97,2023-09-14,363628.0,362586.218750
98,2023-09-15,376504.0,364077.718750


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler

# Function to preprocess new dates for prediction
def preprocess_dates_for_prediction(last_date, num_days, look_back=60):
    # Generate sequence of dates
    start_date = last_date + timedelta(days=1)
    end_date = start_date + timedelta(days=num_days)
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')

    # Preprocess dates for prediction
    X_pred = []
    for date in date_range:
        for i in range(look_back):
            date_prev = date - timedelta(days=(look_back - i))
            X_pred.append([date_prev.month, date_prev.day, date_prev.weekday()])

    X_pred = np.array(X_pred)

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_pred_scaled = scaler.fit_transform(X_pred)

    return X_pred_scaled, scaler, date_range

# Function to make predictions
def make_predictions(model, X_pred_scaled, look_back=60):
    # Reshape for LSTM model
    num_samples = X_pred_scaled.shape[0] // look_back
    X_pred_reshaped = X_pred_scaled.reshape(num_samples, look_back, -1)

    # Make predictions
    predicted_values = model.predict(X_pred_reshaped)

    return predicted_values


In [9]:
df[-4:]

Unnamed: 0,Datetime,AEP_MW,Month,Year,Date,Time,Week,Day
0,2004-12-31 01:00:00,13478.0,12,2004,2004-12-31,01:00:00,53,Friday
1,2004-12-31 02:00:00,12865.0,12,2004,2004-12-31,02:00:00,53,Friday
2,2004-12-31 03:00:00,12577.0,12,2004,2004-12-31,03:00:00,53,Friday
3,2004-12-31 04:00:00,12517.0,12,2004,2004-12-31,04:00:00,53,Friday
4,2004-12-31 05:00:00,12670.0,12,2004,2004-12-31,05:00:00,53,Friday
...,...,...,...,...,...,...,...,...
121268,2018-01-01 20:00:00,21089.0,1,2018,2018-01-01,20:00:00,1,Monday
121269,2018-01-01 21:00:00,20999.0,1,2018,2018-01-01,21:00:00,1,Monday
121270,2018-01-01 22:00:00,20820.0,1,2018,2018-01-01,22:00:00,1,Monday
121271,2018-01-01 23:00:00,20415.0,1,2018,2018-01-01,23:00:00,1,Monday


In [11]:
last_available_date = pd.to_datetime(df["Datetime"].iloc[-1])

In [12]:
num_days_to_predict = (last_available_date + pd.DateOffset(days=1)).days_in_month - last_available_date.day


In [13]:
num_days_to_predict

29

In [22]:
print("Old Dataset ",df.shape )
#print("New  Dataset ",NewDataSet.shape )

Old Dataset  (121273, 8)


In [24]:
df2= df.resample('D').sum()

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'

In [17]:
df[-(60-num_days_to_predict):].shape


(31, 8)

In [None]:
start_date = last_available_date + timedelta(days=1)
end_date = start_date + timedelta(days=num_days_to_predict)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

In [None]:
date_df = pd.DataFrame({'Datetime': date_range})
date_df.append()
# Drop the index column
date_df.reset_index(drop=True)

date_df


Unnamed: 0,Datetime
0,2018-01-03
1,2018-01-04
2,2018-01-05
3,2018-01-06
4,2018-01-07
5,2018-01-08
6,2018-01-09
7,2018-01-10
8,2018-01-11
9,2018-01-12


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def preprocess_data_month(date_df, sc):
    """
    Preprocess the data for LSTM model prediction for a month.

    Parameters:
    - date_df: DataFrame containing the dates for the month
    - sc: Scaler object used for training data

    Returns:
    - X_pred: Processed input features for prediction
    """
    # Create a DataFrame with Datetime column
    dataset = date_df.copy()
    dataset["Datetime"] = pd.to_datetime(date_df["Datetime"])

    # Extract Year, Month, Week, Day, and Time
    dataset["Year"] = dataset["Datetime"].dt.year
    dataset["Month"] = dataset["Datetime"].dt.month
    dataset["Week"] = dataset["Datetime"].dt.isocalendar().week
    dataset["Day"] = dataset["Datetime"].dt.dayofweek
    dataset["Time"] = dataset["Datetime"].dt.hour + dataset["Datetime"].dt.minute/60

    # Drop the original Datetime column
    dataset = dataset.drop(columns=["Datetime"])

    # Normalization
    data = sc.transform(dataset)

    X_pred = []

    for i in range(60, data.shape[0]):
        X_pred.append(data[i-60:i])

    X_pred = np.array(X_pred)

    return X_pred

# Assuming you have `sc` as the scaler object from training

# Create your date_df
date_range = pd.date_range(start="2023-06-09", periods=30, freq='D')
date_df = pd.DataFrame({'Datetime': date_range})

# Preprocess the data for prediction
X_pred = preprocess_data_month(date_df, sc)
print(X_pred.shape)  # Just to check the shape, should be (number_of_samples, sequence_length, number_of_features)


(0,)


In [None]:
X_pred,sc=preprocess_data_month(date_df)

TypeError: float() argument must be a string or a real number, not 'datetime.date'

In [None]:

# Load the saved model
model = load_model("/content/drive/MyDrive/TASK1_VERV/electricity-consumption/models/my_lstm_model.h5")

# Get the last available date in the dataset
last_available_date = pd.to_datetime(df["Datetime"].iloc[-1])

# Number of days to predict (from the day after the last available date)
num_days_to_predict = (last_available_date + pd.DateOffset(days=1)).days_in_month - last_available_date.day

# Preprocess new dates for prediction
X_pred_scaled, scaler, date_range = preprocess_dates_for_prediction(last_available_date, num_days_to_predict)

# Make predictions for the new dates
predicted_values = make_predictions(model, X_pred_scaled)

# Inverse transform the predictions
predicted_values = scaler.inverse_transform(predicted_values)

# Create a DataFrame for the predictions
predictions_df = pd.DataFrame({
    'Date': date_range,
    'Predicted_MegaWatt': predicted_values.flatten()
})

# Calculate total energy consumption for the predicted month
total_energy_consumption = predictions_df['Predicted_MegaWatt'].sum()

# Print the predictions and total energy consumption
print("Predicted Energy Consumption for Each Day:")
print(predictions_df)

print("\nTotal Energy Consumption for the Predicted Month:")
print(total_energy_consumption)


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2440, in predict_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2425, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2413, in run_step  **
        outputs = model.predict_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2381, in predict_step
        return self(x, training=False)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_3' (type Sequential).
    
    Input 0 of layer "lstm_12" is incompatible with the layer: expected shape=(None, None, 1), found shape=(None, 60, 3)
    
    Call arguments received by layer 'sequential_3' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 60, 3), dtype=float32)
      • training=False
      • mask=None


# LAST CALL


In [None]:
last_available_date = pd.to_datetime(df["Datetime"].iloc[-1])
num_days_to_predict = (last_available_date + pd.DateOffset(days=1)).days_in_month - last_available_date.day
start_date = last_available_date + timedelta(days=1)
end_date = start_date + timedelta(days=num_days_to_predict)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
date_df = pd.DataFrame({'Datetime': date_range})
# Drop the index column
date_df.reset_index(drop=True)

date_df


Unnamed: 0,Datetime
0,2018-01-03
1,2018-01-04
2,2018-01-05
3,2018-01-06
4,2018-01-07
5,2018-01-08
6,2018-01-09
7,2018-01-10
8,2018-01-11
9,2018-01-12


In [None]:
def preprocess_dates(date_df):
    """
    Preprocess the dates for LSTM model prediction.

    Parameters:
    - date_df: DataFrame containing the dates

    Returns:
    - X_pred: Processed input features for prediction
    """
    dataset = date_df.copy()
    dataset["Datetime"] = pd.to_datetime(date_df["Datetime"])

    # Extract Year, Month, Week, Day, and Time
    dataset["Year"] = dataset["Datetime"].dt.year
    dataset["Month"] = dataset["Datetime"].dt.month
    dataset["Week"] = dataset["Datetime"].dt.isocalendar().week
    dataset["Day"] = dataset["Datetime"].dt.dayofweek
    dataset["Time"] = dataset["Datetime"].dt.hour + dataset["Datetime"].dt.minute/60

    # Drop the original Datetime column
    dataset = dataset.drop(columns=["Datetime"])

    return dataset.values


In [None]:
model=load_model("/content/drive/MyDrive/TASK1_VERV/electricity-consumption/models/my_lstm_model.h5")
# Preprocess the dates
X_pred = preprocess_dates(date_df)


In [None]:
# Define the start and end date for prediction
start_date = "2023-01-01"
end_date = "2023-01-31"

# Create a date range
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Create a DataFrame with Datetime column
date_df = pd.DataFrame({'Datetime': date_range})

# Preprocess the dates
X_pred = preprocess_dates(date_df)

# Scale the data
X_pred_scaled = sc.transform(X_pred)

# Reshape the data for LSTM input (assuming sequence length of 60)
X_pred_reshaped = np.reshape(X_pred_scaled, (X_pred_scaled.shape[0], 1, X_pred_scaled.shape[1]))

# Make predictions
predicted_values = model.predict(X_pred_reshaped)

# Inverse transform the predictions
predicted_values = sc.inverse_transform(predicted_values)

print(predicted_values)




ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2440, in predict_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2425, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2413, in run_step  **
        outputs = model.predict_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2381, in predict_step
        return self(x, training=False)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_3" is incompatible with the layer: expected shape=(None, 60, 1), found shape=(None, 1, 5)
