In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Input,Dense,LSTM,Dropout,MultiHeadAttention,LayerNormalization
from tensorflow.keras.losses import MeanSquaredError,MeanAbsoluteError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping

## you could delete this line here (os one)
- i needed it cause im using wsl for tensorflow, but in normal windows, its possible to remove it 

In [None]:
import os
os.chdir('/tf-acno-projects/Project-Data-Mining')

In [None]:
def X_y_forecasting_splits(Datafile,time_steps):
    X,y = list(),list()
    for start in range(len(Datafile)):
        end = start+time_steps 
        if end>len(Datafile)-1:
            break
        X.append(Datafile.iloc[start:end].values)
        y.append(Datafile.iloc[end]["CO2 Emission"])
    return np.array(X),np.array(y)

In [None]:
def months_converter(DataFile):
    unique_months = DataFile['Month'].unique()
    months_dict = {
        month:idx+1 for idx,month in enumerate(unique_months)
    }
    DataFile['Month'] = DataFile['Month'].map(months_dict)

In [None]:
def lstm_architecture(INPUT_SHAPE,LR):
    input_layer = Input(shape=INPUT_SHAPE)
    hidden_layer = LSTM(64)(input_layer)
    hidden_layer = Dense(32,activation='relu')(hidden_layer)
    hidden_layer = Dense(16,activation='relu')(hidden_layer)
    output_layer = Dense(1,activation='linear')(hidden_layer)

    lstm_model = Model(input_layer,output_layer)
    #lstm_model.summary()
    lstm_model.compile(optimizer=Adam(learning_rate=LR),loss=MeanSquaredError(),metrics=[MeanAbsoluteError()])
    return lstm_model

In [None]:
DataFile = pd.read_csv("Emission.csv")
DataFile.head()

In [None]:
print(DataFile.isnull().sum())
print(DataFile.duplicated().sum())

Alright, there are no null values and no duplicates but there is something wrong with the "Year-Month" column, it's better to split it into two and convert them to numerical values

In [None]:
DataFile[['Year', 'Month']] = DataFile['Year-Month'].str.split('-', expand=True)

DataFile.drop(columns=['Year-Month'], inplace=True)

In [None]:
months_converter(DataFile)

for col in DataFile.columns:
    DataFile[col] = pd.to_numeric(DataFile[col],errors='coerce')
print(DataFile)
print(DataFile.dtypes)

In [None]:
print(f'Dataset Min : {DataFile["CO2 Emission"].min()}')
print(f'Dataset Max : {DataFile["CO2 Emission"].max()}')
print(f'Dataset Mean : {DataFile["CO2 Emission"].mean()}')
print(f'Dataset STD : {DataFile["CO2 Emission"].std()}')

this is a note that we should scale the data later on for our models so we could detect overfitting or underfitting

Now we need to perform visual analysis on our dataset, but first we need to create a csv of our new dataset

In [None]:
DataFile.to_csv("New Emission.csv", index=False)

In [None]:
DataFile = pd.read_csv("New Emission.csv")

DataFile.head()

In [None]:
print(DataFile.dtypes)

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(DataFile["Year"], DataFile["CO2 Emission"], marker="o", linestyle="-", color="b")

# Labels and Title
plt.xlabel("Year")
plt.ylabel("CO2 Emission (ppm)")
plt.title("CO2 Emission Over the Years")
plt.grid(True)

# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.barplot(x=DataFile["Month"], y=DataFile["CO2 Emission"], palette="coolwarm")

# Labels and Title
plt.xlabel("Month")
plt.ylabel("CO2 Emission (ppm)")
plt.title("CO2 Emission by Month")

# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
sns.barplot(x=DataFile["Year"], y=DataFile["CO2 Emission"], palette="coolwarm")

# Labels and Title
plt.xlabel("Year")
plt.ylabel("CO2 Emission (ppm)")
plt.title("CO2 Emission by Year")

plt.xticks(rotation=45, ha="right")

# Show the plot
plt.show()

It's kind of a complex figure so we will group the years into ranges

In [None]:
min_year = DataFile["Year"].min()
max_year = DataFile["Year"].max()

print(min_year)
print(max_year)

In [None]:
bins = [1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015]

labels = ["1971-1975", "1976-1980", "1981-1985", "1986-1990", "1991-1995", "1996-2000", "2001-2005", "2006-2010", "2011-2015"]

DataFile["Year Range"] = pd.cut(DataFile["Year"], bins=bins, labels=labels, right=True)
print(DataFile[["Year", "Year Range"]].head())

In [None]:
plt.figure(figsize=(12, 5))  # Increase width
sns.barplot(x=DataFile["Year Range"], y=DataFile["CO2 Emission"], palette="coolwarm")

plt.xlabel("Year Range")
plt.ylabel("CO2 Emission (ppm)")
plt.title("CO2 Emission by Year")

plt.xticks(rotation=45, ha="right")  # Rotate labels for better spacing

plt.show()


In [None]:
DataFile.drop(columns=['Year Range'], inplace=True)

## LSTM AND TRANSFORMERS


We have 486 rows so :
- train 80% = int(len(DataFile)*0.8)+1 => 389
- test 10% =  int(len(DataFile)*0.1) => 48
- validation 10% = int(len(DataFile)*0.1)

In [None]:
TIME_STEP = 3
X,y = X_y_forecasting_splits(DataFile,TIME_STEP)
scaler = MinMaxScaler()
y_scaled = scaler.fit_transform(y.reshape(-1,1)).flatten()

In [None]:
INPUT_SHAPE = (TIME_STEP,3)
LR = 0.1
EPOCHS = 80
N_SPLITS = 3
CALLBACK = [
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=20,
    min_delta=0.0005,
    min_lr=1e-6,
    ),
    EarlyStopping(
    monitor="val_loss",
    patience=20,
    min_delta=0.0001,
    )]

train_size = int(len(DataFile)*0.8) + 1 
test_val_size = int(len(DataFile)*0.1)

X_train,y_train = X[:train_size],y[:train_size]
X_test,y_test= X[train_size:train_size+test_val_size],y[train_size:train_size+test_val_size]
X_val,y_val = X[train_size+test_val_size:],y[train_size+test_val_size:]

print(f'train size is : {train_size}, test val size is : {test_val_size}')
print(f'train : {X_train.shape} , {y_train.shape}')
print(f'test : {X_test.shape} , {y_test.shape}')
print(f'val : {X_val.shape} , {y_val.shape}')

### LSTM

In [None]:
time_series_split_folds = TimeSeriesSplit(n_splits=N_SPLITS)
performance = []
for fold ,(training_idx, validation_idx) in enumerate(time_series_split_folds.split(X,y)):
    lstm_model = lstm_architecture(INPUT_SHAPE,LR)

    X_train_cv = tf.convert_to_tensor(X[training_idx], dtype=tf.float32)
    X_val_cv = tf.convert_to_tensor(X[validation_idx], dtype=tf.float32)
    y_train_cv = tf.convert_to_tensor(y_scaled[training_idx], dtype=tf.float32)
    y_val_cv = tf.convert_to_tensor(y_scaled[validation_idx], dtype=tf.float32)
    
    lstm_model.fit(X_train_cv,y_train_cv,epochs=EPOCHS,validation_data=(X_val_cv,y_val_cv),callbacks=CALLBACK,verbose=1)
    val_loss, val_mae = lstm_model.evaluate(X_val_cv, y_val_cv,verbose=0)
    
    y_val_preds = lstm_model.predict(X_val_cv)
    y_val_preds = scaler.inverse_transform(y_val_preds).flatten()
    y_val_original = scaler.inverse_transform(y_val_cv.numpy().reshape(-1,1)).flatten()
    
    mae_original = np.mean(np.abs(y_val_preds - y_val_original))
    
    performance.append({
    "fold": fold,
    "val_loss": f'{val_loss:.4f}',
    "val_mae_scaled": f'{val_mae:.4f}',
    "val_mae_original": f'{mae_original:.4f}',
    })    
    
    print(f'Fold {fold} , val_loss is : {val_loss:.2f}, MAE scaled is : {val_mae:.2f}, MAE original is : {mae_original:.2f}') 

In [None]:
performance = pd.DataFrame(performance)
print(performance)

for the lstm model , thats the max we could have

### Transformer

In [195]:
def positional_encoding(sequence_length,d_model,n=10000):
    """
    d_model: the dimension of our input ( output of the embedding space )
    sequence_length: the length of our sequence for example we have 3 features then its 3
    """
    PosEnc = np.zeros((sequence_length,d_model))
    indices = np.arange(int(d_model/2))
    positions = np.arange((sequence_length))
    
    for position in positions:
        for index in indices:
            denomenator = np.power(n,2*index/d_model)
            PosEnc[position,2*index] = np.sin(position/denomenator)
            PosEnc[position,2*index+1] = np.cos(position/denomenator)

    pos_enc_tensor = tf.constant(PosEnc, dtype=tf.float32)
    pos_enc_tensor = tf.reshape(pos_enc_tensor, (1, sequence_length, d_model))
    return pos_enc_tensor

In [196]:
D_MODEL = 64
SEQ_LEN = 3
N_SIZE = 10000

NUM_HEADS = 8
KEY_DIM = D_MODEL//NUM_HEADS

- We use **the embedding** which is a way to convert raw input into a high-dimensional vector (d_model in this case).
so our embedding vector shape is **(batch_size,time,features_embedding)**
- **Positional encoding** is added to this embedding vector so the model knows about the position of each token or feature.

In [None]:
def transformer_embedding_positioning(input_layer):
    x_embedded = Dense(D_MODEL)(input_layer)
    pos_enc = positional_encoding(SEQ_LEN,D_MODEL)
    return x_embedded + pos_enc

In [198]:
def transformer_encoder(x):
    attention_layer = MultiHeadAttention(num_heads=NUM_HEADS,key_dim=KEY_DIM)(x,x)
    add_norm1 = LayerNormalization(epsilon=1e-6)(x+attention_layer)
    
    feed_forward_input = Dense(D_MODEL*2,activation='relu')(add_norm1)
    feed_forward_output = Dense(D_MODEL)(feed_forward_input)
    add_norm2 = LayerNormalization(epsilon=1e-6)(add_norm1+feed_forward_output)
    
    return add_norm2

In [199]:
def transformer_architecture():   
    input_layer = Input(shape=INPUT_SHAPE)
    x_positioned_embedded = transformer_embedding_positioning(input_layer) 
    x1 = transformer_encoder(x_positioned_embedded)
    x2 = transformer_encoder(x1)
    output_layer = Dense(1)(x2)
    
    transformer = Model(input_layer,output_layer)
    transformer.compile(optimizer=Adam(learning_rate=LR),loss=MeanSquaredError(),metrics=[MeanAbsoluteError()])
    return transformer

In [None]:
time_series_split_folds = TimeSeriesSplit(n_splits=N_SPLITS)
performance = []
for fold ,(training_idx, validation_idx) in enumerate(time_series_split_folds.split(X,y)):
    transformer_model = transformer_architecture()

    X_train_cv = tf.convert_to_tensor(X[training_idx], dtype=tf.float32)
    X_val_cv = tf.convert_to_tensor(X[validation_idx], dtype=tf.float32)
    y_train_cv = tf.convert_to_tensor(y_scaled[training_idx], dtype=tf.float32)
    y_val_cv = tf.convert_to_tensor(y_scaled[validation_idx], dtype=tf.float32)
    
    transformer_model.fit(X_train_cv,y_train_cv,epochs=EPOCHS,validation_data=(X_val_cv,y_val_cv),callbacks=CALLBACK,verbose=1)
    val_loss, val_mae = transformer_model.evaluate(X_val_cv, y_val_cv,verbose=0)
    
    y_val_preds = transformer_model.predict(X_val_cv)
    y_val_preds = y_val_preds[:, -1, :]  # Shape (120, 1)
    
    # Reshape predictions
    y_val_preds_reshaped = y_val_preds.reshape(-1, 1)

    # Apply inverse transform
    y_val_preds_original = scaler.inverse_transform(y_val_preds_reshaped).flatten()

    # Reshape validation data
    y_val_reshaped = y_val_cv.numpy().reshape(-1, 1)

    # Apply inverse transform
    y_val_original = scaler.inverse_transform(y_val_reshaped).flatten() 
    
    mae_original = np.mean(np.abs(y_val_preds_original - y_val_original))
    
    performance.append({
    "fold": fold,
    "val_loss": f'{val_loss:.4f}',
    "val_mae_scaled": f'{val_mae:.4f}',
    "val_mae_original": f'{mae_original:.4f}',
    })    
    
    print(f'Fold {fold} , val_loss is : {val_loss:.2f}, MAE scaled is : {val_mae:.2f}, MAE original is : {mae_original:.2f}') 

SEQ_LEN: 3, D_MODEL: 64, INPUT_DIM: (3, 3)
Embedded shape: (None, 3, 64)
Positional encoding shape: (1, 3, 64)
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Raw predictions shape: (120, 3, 1)
Validation data shape: (120,)
Raw predictions shape: (120, 1)
Reshaped predictions shape: (120, 1)
Final predictions shape: (120,)
Reshaped validation shape: (120, 1)
Final validation shape: (120,)
Fold 0 , val_loss is : 0.03, MAE scaled is : 0.16, MAE original is : 12.99
SEQ_LEN: 3, D_MODEL: 64, INPUT_DIM: (3, 3)
Embedded shape: (None, 3, 64)
Positional encoding shape: (1, 3, 64)
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11

Raw predictions shape: (120, 3, 1)
Validation data shape: (120,)
Raw predictions shape: (120, 1)
Reshaped predictions shape: (120, 1)
Final predictions shape: (120,)
Reshaped validation shape: (120, 1)
Final validation shape: (120,)
Fold 2 , val_loss is : 0.21, MAE scaled is : 0.45, MAE original is : 38.37