# Import packages

In [6]:
%pip install -q pandas matplotlib numpy scikit-learn 
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Note: you may need to restart the kernel to use updated packages.
Looking in indexes: https://download.pytorch.org/whl/cu118
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt

In [23]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0))


# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

CUDA Available: True
GPU Name: NVIDIA GeForce RTX 2070 with Max-Q Design
Using device: cuda


In [18]:
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

# Import dataset

In [21]:
# Import train
train = pd.read_csv('../Data/train_test/train_df.csv')

# Import test
test = pd.read_csv('../Data/train_test/test_df.csv')

# Dummies by region

In [None]:
# y train and test
y_train = train['tc_loss_area']
y_test = test['tc_loss_area']

# Normalize output
scaler = StandardScaler()
y_train = scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test = scaler.transform(y_test.values.reshape(-1, 1))

# Convert to PyTorch tensors and send to GPU
y_tensor_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_tensor_test = torch.tensor(y_test, dtype=torch.float32).to(device)

### 1. Lasso

In [29]:
# Get rid of columns that start with 'subnational1_' and 'cluster_' in train and test   
train1 = train.loc[:,~train.columns.str.startswith('subnational1_')]
train1 = train.loc[:,~train.columns.str.startswith('cluster_')]
test1 = test.loc[:,~test.columns.str.startswith('subnational1_')]
test1 = test.loc[:,~test.columns.str.startswith('cluster_')]

# X train and test
X_train1 = train1.drop(columns=['tc_loss_area', 'codmpio'])
X_test1 = test1.drop(columns=['tc_loss_area', 'codmpio'])

# Normalize features
X_train1 = scaler.fit_transform(X_train1)
X_test1 = scaler.transform(X_test1)

# Convert to PyTorch tensors and send to GPU
X_tensor_train = torch.tensor(X_train1, dtype=torch.float32).to(device)
X_tensor_test = torch.tensor(X_test1, dtype=torch.float32).to(device)
y_tensor_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_tensor_test = torch.tensor(y_test, dtype=torch.float32).to(device)

In [28]:
class LassoRegression(nn.Module):
    def __init__(self, input_dim, l1_lambda=0.01):
        super(LassoRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.l1_lambda = l1_lambda  # Regularization strength

    def forward(self, x):
        return self.linear(x)

    def l1_regularization_loss(self):
        return self.l1_lambda * torch.norm(self.linear.weight, p=1)  # L1 Regularization (Lasso)


In [None]:
# Define model, optimizer, and loss function
model_lasso1 = LassoRegression(input_dim=X_train1.shape[1], l1_lambda=0.01).to(device)
optimizer = optim.Adam(model_lasso1.parameters(), lr=0.01)  # Adam optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model_lasso1.train()
    optimizer.zero_grad()
    
    y_pred = model_lasso1(X_train)
    loss = criterion(y_pred, y_train) + model_lasso1.l1_regularization_loss()  # Include L1 regularization
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


### 2. Ridge