In [76]:
import pandas as pd
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

def seed_everything(seed: int = 42):

    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Usage:
seed_everything(42)

In [77]:
uhi = pd.read_csv('uhi_imputed.csv')

In [78]:
X = uhi.drop(columns=['Latitude', 'Longitude', 'UHI Index', 'datetime'], axis=1)
y = uhi['UHI Index']

In [79]:
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

X_tensor = torch.tensor(X_scaled
, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32).view(-1, 1)

class UHIDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
train = UHIDataset(X_tensor, y_tensor)
train_loader = DataLoader(train, batch_size=32, shuffle=True)

In [80]:
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.ReLU(),
            nn.Linear(dim, dim)
        )
    def forward(self, x):
        return x + self.block(x)

class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            ResidualBlock(64),
            nn.Linear(64, 32),
            nn.ReLU(),
            ResidualBlock(32),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.net(x)

In [81]:
input_dim = X.shape[1]
model = Model(input_dim)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [82]:
num_epochs = 200

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')

model.eval()
with torch.no_grad():
    preds_scaled = model(X_tensor)  
    preds_orig = scaler_y.inverse_transform(preds_scaled.numpy())
    y_orig = y.values.reshape(-1, 1)
    loss = criterion(preds_scaled, y_tensor)
    r2 = r2_score(y_orig, preds_orig)
    print(f'Loss: {loss.item()}, R2: {r2}')

Epoch 1/200, Loss: 0.8522730394136532
Epoch 2/200, Loss: 0.8142241507004468
Epoch 3/200, Loss: 0.7978041565146541
Epoch 4/200, Loss: 0.7844151896119458
Epoch 5/200, Loss: 0.7665796387738991
Epoch 6/200, Loss: 0.7624893490745132
Epoch 7/200, Loss: 0.7532003637392637
Epoch 8/200, Loss: 0.7421954373518626
Epoch 9/200, Loss: 0.734748963701759
Epoch 10/200, Loss: 0.7230550453194187
Epoch 11/200, Loss: 0.7164804684130894
Epoch 12/200, Loss: 0.7070208484800453
Epoch 13/200, Loss: 0.6942958393667498
Epoch 14/200, Loss: 0.6852796329052699
Epoch 15/200, Loss: 0.68012200121866
Epoch 16/200, Loss: 0.6675467452941797
Epoch 17/200, Loss: 0.6585240101712382
Epoch 18/200, Loss: 0.651850471299598
Epoch 19/200, Loss: 0.6390649278958639
Epoch 20/200, Loss: 0.636316615971405
Epoch 21/200, Loss: 0.6229255146790095
Epoch 22/200, Loss: 0.6146671629784114
Epoch 23/200, Loss: 0.6090929895384699
Epoch 24/200, Loss: 0.6011796588571663
Epoch 25/200, Loss: 0.596647163741609
Epoch 26/200, Loss: 0.5876404526736322
E

In [83]:
feature_columns = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 
                   'B07', 'B08', 'B8A', 'B11', 'B12', 'LST', 'is_building', 
                   'Air Temp at Surface [degC]', 'Relative Humidity [percent]', 
                   'Avg Wind Speed [m/s]', 'Wind Direction [degrees]', 'Solar Flux [W/m^2]']

submission = pd.read_csv('validation_imputed.csv')

submission_prepared = submission[feature_columns].copy()

submission_prepared_scaled = scaler_X.transform(submission_prepared)

submission_tensor = torch.tensor(submission_prepared_scaled, dtype=torch.float32)

model.eval()

with torch.no_grad():
    final_predictions_tensor = model(submission_tensor)
    final_predictions_np = scaler_y.inverse_transform(final_predictions_tensor.numpy())

final_prediction_series = pd.Series(final_predictions_np.flatten())

In [84]:
sub = pd.read_csv('Submission_template.csv')

submission_df = pd.DataFrame({'Longitude':sub['Longitude'].values, 'Latitude':sub['Latitude'].values, 'UHI Index':final_prediction_series.values})

submission_df.to_csv('submission.csv', index=False)