In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv, BatchNorm

# Load the dataset
file_path = 'data/MacroKAGCN.csv'
data = pd.read_csv(file_path)

# Inspect the data to understand its structure
print(data.head())

            Year Country Code  \
0  1983 [YR1983]          USA   
1  1984 [YR1984]          USA   
2  1985 [YR1985]          USA   
3  1986 [YR1986]          USA   
4  1987 [YR1987]          USA   

   Net trade in goods and services (BoP, current US$)  GDP (current US$)  \
0                                      -5.713500e+10        3.634038e+12   
1                                      -1.082770e+11        4.037613e+12   
2                                      -1.211020e+11        4.338979e+12   
3                                      -1.385270e+11        4.579631e+12   
4                                      -1.516750e+11        4.855215e+12   

   Consumer price index (2010 = 100)  \
0                          45.676445   
1                          47.640776   
2                          49.329949   
3                          50.266255   
4                          52.108294   

   Unemployment, total (% of total labor force) (national estimate)  \
0                               

In [2]:
# Assuming the dataset has columns for 'Country Code', 'Year', and the various economic indicators
features = ['Net trade in goods and services (BoP, current US$)',
            'GDP (current US$)',
            'Consumer price index (2010 = 100)',
            'Unemployment, total (% of total labor force) (national estimate)',
            'Exports of goods and services (BoP, current US$)',
            'Imports of goods and services (BoP, current US$)',
            'Foreign direct investment, net (BoP, current US$)',
            'Official exchange rate (LCU per US$, period average)']

# Create node features for each country
node_features_list = []
labels_list = []
country_codes = data['Country Code'].unique()

for country in country_codes:
    country_data = data[data['Country Code'] == country]
    
    # Create node features
    node_features_list.append(country_data[features].values)
    
    # Create labels (next year's exchange rate)
    next_year_exchange_rate = country_data['Official exchange rate (LCU per US$, period average)'].shift(-1)
    
    # Drop the last row to ensure matching lengths
    country_data = country_data.iloc[:-1]
    
    # Append the features and labels
    node_features_list[-1] = country_data[features].values
    labels_list.append(next_year_exchange_rate.iloc[:-1].values)


# Convert to tensors
node_features = torch.tensor(np.concatenate(node_features_list), dtype=torch.float)
labels = torch.tensor(labels_list, dtype=torch.float)

# Normalize node features
scaler = StandardScaler()
node_features = torch.tensor(scaler.fit_transform(node_features), dtype=torch.float)


  labels = torch.tensor(labels_list, dtype=torch.float)


In [3]:
# Create edge index and set all edge weights to 1
edge_index = []
edge_attr = []

# Create a fully connected graph with all edges having weight 1
for i in range(len(country_codes)):
    for j in range(i + 1, len(country_codes)):
        edge_index.append([i, j])
        edge_index.append([j, i])  # Assuming undirected graph
        edge_attr.append(1.0)
        edge_attr.append(1.0)

# Convert to PyTorch tensors
edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_attr, dtype=torch.float)

# Create PyTorch Geometric data object
from torch_geometric.data import Data
graph_data = Data(x=node_features, edge_index=edge_index, edge_attr=edge_attr, y=labels)

print(labels.shape)

torch.Size([6, 40])


In [4]:
# Create edge index and set all edge weights to 1
edge_index = []
edge_attr = []

# Create a fully connected graph with all edges having weight 1
for i in range(len(country_codes)):
    for j in range(i + 1, len(country_codes)):
        edge_index.append([i, j])
        edge_index.append([j, i])  # Assuming undirected graph
        edge_attr.append(1.0)
        edge_attr.append(1.0)

# Convert to PyTorch tensors
edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_attr, dtype=torch.float)


In [225]:
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv, BatchNorm, GATConv

class KAGCN(nn.Module):
    def __init__(self, num_node_features, hidden_dim, output_dim=6, dropout=0.3):
        super(KAGCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_dim)
        self.bn1 = BatchNorm(hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.bn2 = BatchNorm(hidden_dim)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)  # Output layer for 6 countries
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
        
    def forward(self, x, edge_index, edge_weight):
        x = self.conv1(x, edge_index, edge_weight)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.conv2(x, edge_index, edge_weight)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)  # Final output is a vector with 6 elements

        # Reshape if needed to ensure correct output shape, depending on how your data is structured
        return x


KAGCN(
  (conv1): GCNConv(8, 128)
  (bn1): BatchNorm(128)
  (conv2): GCNConv(128, 128)
  (bn2): BatchNorm(128)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)


In [None]:
# Verify input dimensions before running the model
model = KAGCN(num_node_features=node_features.shape[1], hidden_dim=128, output_dim=1)
print(model)

In [5]:
def check_gradients(model):
    for name, param in model.named_parameters():
        if param.requires_grad and param.grad is not None:
            print(f"{name} grad mean: {param.grad.mean()}")


# Apply gradient clipping during training
def train_model(model, data, labels, num_epochs=500, learning_rate=0.001):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.SmoothL1Loss()  # Using Huber Loss here
    max_norm = 1.0  # Set max norm for gradient clipping

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        
        outputs = model(data.x, data.edge_index, data.edge_attr)
        loss = criterion(outputs.squeeze(), labels)
        
        loss.backward()
        
        # After loss.backward() and before optimizer.step()
        #check_gradients(model)
        # Clip gradients to avoid explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        
        optimizer.step()
        
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')

In [230]:
# Train the model
train_model(model, graph_data, num_epochs=500, learning_rate=0.001, labels=)

# Ensure the model is in evaluation mode
model.eval()

# Make predictions
with torch.no_grad():
    predictions = model(graph_data.x, graph_data.edge_index, graph_data.edge_attr)

# Reshape predictions to match the expected shape
predictions = predictions.view(-1, 6).numpy()
labels_np = labels.view(-1, 6).numpy()

# Calculate the Mean Squared Error
mse = mean_squared_error(labels_np, predictions)
print(f"Test MSE: {mse}")

# Visualize the predictions vs. actual values
plt.figure(figsize=(10, 6))
for i, country in enumerate(country_codes):
    plt.plot(labels_np[:, i], label=f'Actual {country}')
    plt.plot(predictions[:, i], label=f'Predicted {country}', linestyle='dashed')
    print(f'Predicted {country}', predictions[i])

plt.xlabel("Time")
plt.ylabel("Exchange Rate")
plt.title("Actual vs Predicted Exchange Rates")
plt.legend()
plt.show()




RuntimeError: The size of tensor a (240) must match the size of tensor b (6) at non-singleton dimension 1

In [223]:
# Example prediction for the next year
for i, country in enumerate(country_codes):
    print(f"Country: {country}, Predicted Exchange Rate for Next Year: {predictions[i].item()}")

ValueError: can only convert an array of size 1 to a Python scalar