In [14]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [15]:
df = pd.read_csv('clean.csv')
df.head()

Unnamed: 0,exports,imports,industrial production growth rate,investment,unemployement rate,gdp
0,0.126491,0.024505,0.280423,0.161121,0.351585,0.026382
1,0.181904,0.062921,0.375661,0.194396,0.110951,0.119814
2,0.146305,0.082021,0.330688,0.217163,0.040346,0.02142
3,0.085787,0.031111,0.343915,0.166375,0.053314,0.015557
4,0.353292,0.173487,0.259259,0.19965,0.092219,0.087047


In [16]:
X = df.drop('gdp', axis=1).values
y = df['gdp'].values

In [22]:
X.shape

(137, 5)

In [25]:
X[0]

array([0.12649071, 0.02450542, 0.28042328, 0.16112084, 0.35158501])

In [26]:
y[0]

0.0263822677281101

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
class factbook_data:

    def __init__(self, X, y, scale_data=True):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            if scale_data:
                X = StandardScaler().fit_transform(X)
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

In [33]:
obj = factbook_data(X, y, scale_data=True)
print(obj.__len__())
obj.__getitem__(0)

137


(tensor([ 0.8809, -0.0548, -0.7353, -0.6388,  1.1484], dtype=torch.float64),
 tensor(0.0264, dtype=torch.float64))

In [30]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(5, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, X):
        return self.layers(X)

In [37]:
if __name__=='__main__':
    torch.manual_seed(42)
    X,y = X, y

In [42]:
dataset = factbook_data(X, y, scale_data=False)
trainloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=0)

In [40]:
mlp = MLP()

loss_function = nn.L1Loss()
optimizer = torch.optim.Adagrad(mlp.parameters(), lr=1e-4)

In [46]:
for epoch in range(0, 5):
    print(f'Starting Epoch: {epoch+1}')

    current_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, targets = data
        inputs, targets = inputs.float(), targets.float()
        targets = targets.reshape((targets.shape[0]), 1)

        optimizer.zero_grad()

        outputs = mlp(inputs)

        loss = loss_function(outputs, targets)

        loss.backward()

        optimizer.step()

        current_loss += loss.item()

        if i%10==0:
            print(f'Loss after mini batch {i+1}: {current_loss/500}')
            current_loss = 0.0

    print(f'Epoch {epoch+1} finished')

print('Training has completed')

Starting Epoch: 1
Loss after mini batch 1: 6.966693699359894e-05
Loss after mini batch 11: 0.0004739650245755911
Epoch 1 finished
Starting Epoch: 2
Loss after mini batch 1: 6.283991038799286e-05
Loss after mini batch 11: 0.00041408875212073327
Epoch 2 finished
Starting Epoch: 3
Loss after mini batch 1: 7.268203794956208e-05
Loss after mini batch 11: 0.00040532084554433824
Epoch 3 finished
Starting Epoch: 4
Loss after mini batch 1: 3.436998277902603e-05
Loss after mini batch 11: 0.0004175704177469015
Epoch 4 finished
Starting Epoch: 5
Loss after mini batch 1: 2.9189512133598328e-05
Loss after mini batch 11: 0.0004847290385514498
Epoch 5 finished
Training has completed


In [47]:
test_data = torch.from_numpy(X_test).float()
test_targets = torch.from_numpy(y_test).float()

In [48]:
mlp.eval()

MLP(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [49]:
with torch.no_grad():
    outputs = mlp(test_data)
    predicted_labels = outputs.squeeze().tolist()

predicted_labels = np.array(predicted_labels)
test_targets = np.array(test_targets)

mse = mean_squared_error(test_targets, predicted_labels)
r2 = r2_score(test_targets, predicted_labels)

print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

Mean Squared Error:  0.003841374886917585
R2 Score:  -0.0537770242367368
