In [1]:
import sys
import os
sys.path.append(os.path.abspath('..'))

In [2]:
import torch
import src.load as load
import src.utils as utils
from src.model import KiloModel,ScheduleModel,FinalModel
from src.encoder import ClimateEncoder
from torch.utils.data import DataLoader
import torch.nn.utils.weight_norm as weight_norm
from torch import nn

In [3]:
train_year, test_year, _, _, _ = load.separate_year('../data/processed/')

In [4]:
train_prop, test_prop, _, _, _ = load.separate_prop('../data/processed/')

In [5]:
train_loader = DataLoader(train_prop, batch_size=32, shuffle=True)
val_loader = DataLoader(test_prop, batch_size=32, shuffle=True)
criterion = nn.MSELoss()


In [6]:
k_lr = 1e-4  # Reduced from 1e-3 to prevent NaN
s_lr = 1e-4
f_lr = 1e-4

In [7]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)
    elif isinstance(m, nn.GRU):
        for param in m.parameters():
            if len(param.shape) >= 2:
                torch.nn.init.orthogonal_(param.data)
            else:
                torch.nn.init.normal_(param.data)

k_encoder = ClimateEncoder()
k_model = KiloModel(k_encoder)

# Initialize weights for better stability
k_model.apply(init_weights)

k_opt = torch.optim.Adam(k_model.parameters(), lr=k_lr,weight_decay=1e-4)

k_scheduler = torch.optim.lr_scheduler.StepLR(k_opt, step_size=10, gamma=0.1)


  WeightNorm.apply(module, name, dim)


In [8]:
num_epochs = 100
num_weeks = 10

In [9]:
def train_kilo_model(model,train_loader,val_loader,criterion,optimizer,scheduler,num_epochs,num_weeks):
    for epoch in range(num_epochs):
        train_loss = 0
        model.train()

        for batch in train_loader:
            features, encoded_features, climate_data, y_kilos, y_combined, schedule, _= batch
            
            # Check for NaN in input data
            if torch.isnan(features).any() or torch.isnan(climate_data).any() or torch.isnan(y_combined).any():
                print("NaN detected in input data, skipping batch")
                continue
                
            y = y_kilos.cumsum(dim=1)
            climate_data = climate_data[:,:num_weeks * 7,:]
            inputs = y_combined[:,:num_weeks,:]
            outputs = model(features, encoded_features, climate_data, inputs).cumsum(dim=1)
            loss = 0.5 * criterion(outputs, y) + criterion(outputs[:,num_weeks:], y[:,num_weeks:])

            # Check for NaN in loss
            if torch.isnan(loss):
                print(f"NaN loss detected at epoch {epoch+1}")
                break

            optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            
            train_loss += loss.item()

        val_loss = 0
        model.eval()
        with torch.no_grad():
            for batch in val_loader:
                features, encoded_features, climate_data, y_kilos, y_combined, schedule, _= batch
                y = y_kilos.cumsum(dim=1)
                climate_data = climate_data[:,:num_weeks * 7,:]
                inputs = y_combined[:,:num_weeks,:]
                outputs = model(features, encoded_features, climate_data, inputs).cumsum(dim=1)
                loss = criterion(outputs,y)
                val_loss += loss.item()

        scheduler.step()
        # Check for NaN in validation loss
        avg_train_loss = train_loss/len(train_loader)
        avg_val_loss = val_loss/len(val_loader)
        
        if torch.isnan(torch.tensor(avg_train_loss)) or torch.isnan(torch.tensor(avg_val_loss)):
            print(f"NaN detected at epoch {epoch+1}. Stopping training.")
            break
        
        print(f"Epoch {epoch+1}/{num_epochs}, Week {num_weeks}, Train Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}") 

In [10]:

s_encoder = ClimateEncoder()
s_model = ScheduleModel(s_encoder)
s_opt = torch.optim.Adam(s_model.parameters(), lr=s_lr,weight_decay=1e-4)
s_scheduler = torch.optim.lr_scheduler.StepLR(s_opt, step_size=10, gamma=0.1)

  WeightNorm.apply(module, name, dim)


In [11]:
def train_schedule_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, num_weeks):
    for epoch in range(num_epochs):
        train_loss = 0
        model.train()

        for batch in train_loader:
            features, encoded_features, climate_data, _, y_combined, schedule, _ = batch
            
            # Check for NaN in input data
            if torch.isnan(features).any() or torch.isnan(climate_data).any() or torch.isnan(schedule).any():
                print("NaN detected in input data, skipping batch")
                continue
                
            climate_data = climate_data[:, :num_weeks * 7, :]
            inputs = y_combined[:, :num_weeks, :]
            outputs = model(features, encoded_features, climate_data, inputs)
            loss = criterion(outputs, schedule)

            # Check for NaN in loss
            if torch.isnan(loss):
                print(f"NaN loss detected at epoch {epoch+1}")
                break

            optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            
            train_loss += loss.item()

        val_loss = 0
        model.eval()
        with torch.no_grad():
            for batch in val_loader:
                features, encoded_features, climate_data, _, y_combined, schedule, _ = batch
                climate_data = climate_data[:, :num_weeks * 7, :]
                inputs = y_combined[:, :num_weeks, :]
                outputs = model(features, encoded_features, climate_data, inputs)
                loss = criterion(outputs, schedule)
                val_loss += loss.item()

        scheduler.step()
        # Check for NaN in validation loss
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        
        if torch.isnan(torch.tensor(avg_train_loss)) or torch.isnan(torch.tensor(avg_val_loss)):
            print(f"NaN detected at epoch {epoch+1}. Stopping training.")
            break
        
        print(f"Epoch {epoch+1}/{num_epochs}, Week {num_weeks}, Train Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}")
    return model

In [12]:
rates = [(1,50),(10,50),(14,20),(20,10),(25,5),(14,20),(10,50)]
for week, epochs in rates:
    train_kilo_model(k_model,train_loader,val_loader,criterion,k_opt,k_scheduler,epochs,week)
    train_schedule_model(s_model,train_loader,val_loader,criterion,s_opt,s_scheduler,epochs,week)

Epoch 1/50, Week 1, Train Loss: 78211949.53684211, Validation Loss: 52176134.166666664
Epoch 2/50, Week 1, Train Loss: 77862495.28421053, Validation Loss: 52009259.75
Epoch 3/50, Week 1, Train Loss: 77479433.07368422, Validation Loss: 51700149.75
Epoch 4/50, Week 1, Train Loss: 76822959.97894737, Validation Loss: 51708737.416666664
Epoch 5/50, Week 1, Train Loss: 76258443.38947369, Validation Loss: 50881944.125
Epoch 6/50, Week 1, Train Loss: 75403154.94736843, Validation Loss: 50144028.833333336
Epoch 7/50, Week 1, Train Loss: 74297466.21052632, Validation Loss: 49783385.083333336
Epoch 8/50, Week 1, Train Loss: 73068048.05263157, Validation Loss: 48893858.333333336
Epoch 9/50, Week 1, Train Loss: 71895232.35789473, Validation Loss: 47677357.958333336
Epoch 10/50, Week 1, Train Loss: 70913223.26315789, Validation Loss: 46735642.291666664
Epoch 11/50, Week 1, Train Loss: 70052671.2, Validation Loss: 46366102.333333336
Epoch 12/50, Week 1, Train Loss: 69981952.29473685, Validation Loss:

In [13]:
torch.save(k_model.state_dict(), 'k_model.pth')
torch.save(s_model.state_dict(), 's_model.pth')
