In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
d = pd.read_csv('API_EN.ATM.CO2E.PC_DS2_en_csv_v2_1627.csv')
df = d.dropna(axis=1, how='all')
df = df.drop(['Indicator Name', 'Indicator Code'], axis=1)
df = df.drop('Country Code',axis = 1)
df.head()

Unnamed: 0,Country Name,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Africa Eastern and Southern,0.982975,0.942212,0.907936,0.90955,0.913413,0.933001,0.9432,0.962203,0.963157,...,0.97684,0.989585,1.001154,1.013758,0.96043,0.941337,0.933874,0.921453,0.915294,0.79542
1,Afghanistan,0.191389,0.180674,0.126517,0.109106,0.096638,0.088781,0.082267,0.075559,0.07127,...,0.408965,0.335061,0.298088,0.283692,0.297972,0.268359,0.281196,0.299083,0.297564,0.223479
2,Africa Western and Central,0.470111,0.521084,0.558013,0.513859,0.462384,0.492656,0.554305,0.540062,0.506709,...,0.451578,0.452101,0.481623,0.493505,0.475577,0.479775,0.465166,0.475817,0.490837,0.46315
3,Angola,0.554941,0.545807,0.544413,0.710961,0.839266,0.914265,1.07363,1.086325,1.091173,...,0.983787,0.947583,1.031044,1.091497,1.125185,1.012552,0.829723,0.755828,0.753638,0.592743
4,Albania,1.844035,1.261054,0.689644,0.644008,0.649938,0.612055,0.621206,0.469831,0.576804,...,1.768109,1.565921,1.65639,1.795712,1.665219,1.590069,1.880557,1.854642,1.749462,1.54455


In [3]:
in_year = [str(i) for i in range(1990,2020)]
target = '2020'
X = df[['Country Name']+in_year]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
numerical = in_year
categorical = ['Country Name']
enc = OneHotEncoder(handle_unknown='ignore')

In [5]:
num = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', enc)
])

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num, numerical),
        ('cat', cat, categorical)
    ]
)

In [7]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)
y_train.fillna(y_train.median(), inplace=True)
y_test.fillna(y_test.median(), inplace=True)

In [8]:
a = X_train.shape[1]
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(a,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
model.evaluate(X_test, y_test)
model.save('virtual_time_travel.h5')



Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 122ms/step - loss: 39.0042 - mean_absolute_error: 4.0038 - val_loss: 18.4687 - val_mean_absolute_error: 2.6848
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 22.5518 - mean_absolute_error: 3.3520 - val_loss: 13.4646 - val_mean_absolute_error: 2.5447
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 12.1334 - mean_absolute_error: 2.7702 - val_loss: 7.6764 - val_mean_absolute_error: 2.1047
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 5.6645 - mean_absolute_error: 1.9484 - val_loss: 3.6235 - val_mean_absolute_error: 1.4660
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 4.9378 - mean_absolute_error: 1.6680 - val_loss: 3.1228 - val_mean_absolute_error: 1.3431
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step 



In [9]:
from sklearn.metrics import r2_score
y_pred = model.predict(X_test)

r2 = r2_score(y_test,y_pred)
print("R² Score:", r2)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
R² Score: 0.9068654925184425
