<a href="https://colab.research.google.com/github/basugautam/Reproducibility-Challenge-Project/blob/main/1_Traditional_Forecast_Methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras import optimizers
from tensorflow.keras.losses import MeanSquaredError
import os

# Load your dataset (ensure the file path is correct in Google Colab, so use the upload or mount method)
from google.colab import files
uploaded = files.upload()

# Let's assume your file is named 'timeseries_dataset.csv' (adjust according to your actual file)
df = pd.read_csv('timeseries_dataset.csv')

# Check the first few rows of the dataset to understand its structure
df.head()

# If needed, convert date/time column to datetime type (adjust column names accordingly)
# df['date_column'] = pd.to_datetime(df['date_column'])
# df.set_index('date_column', inplace=True)

# Let's assume the dataset consists of a single time series column
# Adjust the column name as per your dataset
data = df['your_column_name'].values

# Scale the data using MinMaxScaler for LSTM/GRU input (recommended for neural networks)
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data.reshape(-1, 1))

# Create a function to generate input-output sequences for time series forecasting
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length, 0])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)

# Set the sequence length for the time steps
seq_length = 50  # Number of time steps to predict the next step

# Create sequences for training data
X, y = create_sequences(data_scaled, seq_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Reshape the data to match the LSTM/GRU input format
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the LSTM/GRU-based model with attention-like behavior (weighted loss)
model = Sequential()
model.add(LSTM(units=64, return_sequences=False, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss=MeanSquaredError(), metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Plot the loss curves for training and validation
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Make predictions on the test set
y_pred = model.predict(X_test)

# Invert the scaling for the predictions and actual values
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot the actual vs predicted values
plt.figure(figsize=(10,6))
plt.plot(y_test, label='Actual Values')
plt.plot(y_pred, label='Predicted Values')
plt.title('Time Series Forecasting')
plt.xlabel('Time Steps')
plt.ylabel('Value')
plt.legend()
plt.show()

# Evaluate the model's performance
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')

# Weighted loss function (custom to focus more on later time steps or critical points)
# Here, we'll assign more weight to the last steps of the forecast horizon (like step N)
weights = np.linspace(1, 2, len(y_test))  # Example: increasing weight towards the last time steps

# Adjust the training process with a custom weighted loss
def weighted_loss(y_true, y_pred):
    return tf.reduce_mean(weights * tf.square(y_true - y_pred))

model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss=weighted_loss, metrics=['mae'])

# Re-train the model with weighted loss if necessary (skip this if you already trained without weighted loss)
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


Saving timeseries_data.csv.csv to timeseries_data.csv.csv


FileNotFoundError: [Errno 2] No such file or directory: 'timeseries_dataset.csv'