In [77]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

from typing import Dict
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader


# Device configuration
device = torch.device('mps')



In [78]:
with open('nb_parameters.yml') as file:
    parameters = yaml.load(file, Loader=yaml.FullLoader)

test_size = parameters['model_options']['test_size']

print(test_size)

0.2


In [79]:

# Hyper-parameters 

num_classes = parameters['model_options']['num_classes']
num_epochs = parameters['model_options']['num_epochs']
batch_size = parameters['model_options']['batch_size']
learning_rate = parameters['model_options']['learning_rate']

"""

Each feature as a time step in your sequence, you could set sequence_length to 150 and input_size to 1.
This would mean you are feeding in sequences of length 150, with each time step in the sequence having 1 feature.

"""

input_size = parameters['model_options']['input_size']
sequence_length = parameters['model_options']['sequence_length'] # the window it trains with can be selected
hidden_size = parameters['model_options']['hidden_size']
num_layers = parameters['model_options']['num_layers']
random_state = parameters['model_options']['random_state']

In [80]:
# My dataset for gas sensor
df = pd.read_csv('../data/03_primary/model_input_table.csv')

# read data in 03primary model_inpput_table.pq
# df = pd.read_parquet('../data/03_primary/model_input_table.pq')

In [81]:
def split_data(model_input_table: pd.DataFrame) -> torch.tensor:
    # print(f"Test size: {parameters['test_size']}, type: {type(parameters['test_size'])}")
    # print(f"Random state: {parameters['random_state']}, type: {type(parameters['random_state'])}")

    # Split data into features and target
    X = model_input_table[model_input_table.columns[:-1]].values  # Assuming last column is the target
    y = model_input_table[model_input_table.columns[-1]].values
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    # Initialize StandardScaler
    scaler = StandardScaler()
    # Fit on training data
    scaler.fit(X_train)
    # Transform both training and testing data
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Ensure y_train and y_test are in the correct format
    if isinstance(y_train, pd.Series):
        y_train = y_train.values
    if isinstance(y_test, pd.Series):
        y_test = y_test.values
    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train_scaled.astype(np.float32))
    y_train_tensor = torch.tensor(y_train.astype(np.float32))
    X_test_tensor = torch.tensor(X_test_scaled.astype(np.float32))
    y_test_tensor = torch.tensor(y_test.astype(np.float32))

    return X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor

In [82]:
# create X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor from split_data(df)
X_train_tensor, X_test_tensor, y_train_tensor, y_test_tensor = split_data(df)
# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
# Initialize DataLoaders
batch_size = parameters['model_options']['batch_size']  # You can adjust the batch size according to your needs
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Now, train_loader and test_loader can be used in your training loop

In [83]:
# Fully connected neural network with one hidden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        
        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # x: (n, 28, 28), h0: (2, n, 128)
        
        # Forward propagate RNN
        # out, _ = self.rnn(x, h0)  
        # or:
        out, _ = self.lstm(x, (h0,c0))  
        
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # out: (n, 28, 128)
        
        # Decode the hidden state of the last time step
        out = out[:, -1, :]
        # out: (n, 128)
         
        out = self.fc(out)
        # out: (n, 10)
        return out

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)


In [89]:
# turn the block below into a function
def train_model (data: DataLoader)->():
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# Train the model
    n_total_steps = len(train_loader)
    for epoch in range(num_epochs):
        for i, (bins, target) in enumerate(train_loader):  
            bins = bins.reshape(-1, sequence_length, input_size).to(device)
            target = target.to(device)
        
        # Forward pass
        outputs = model(bins)
        # Example of reshaping/squeezing if applicable
        outputs = outputs.squeeze()  # Removes dimensions of size 1
        outputs = outputs[:64]  # Adjust if you need to slice the outputs

        target = target.unsqueeze(1).to(device)  # Add an extra dimension to match outputs
        loss = criterion(outputs, target)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

    # Calculate RMSE at the end of each epoch
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():  # Don't calculate gradients
            total_loss = 0
            count = 0
            for bins, target in test_loader:  # Replace with your validation loader
                bins = bins.reshape(-1, sequence_length, input_size).to(device)
                target = target.unsqueeze(1).to(device)  # Add an extra dimension to match outputs
                outputs = model(bins)
                loss = criterion(outputs, target)
                total_loss += loss.item()
                count += 1
            rmse = np.sqrt(total_loss / count)
            print(f'Epoch [{epoch+1}/{num_epochs}], RMSE on validation data: {rmse}')
        model.train()  # Set the model back to training mode
    # Save the model after training
    lstm_model = torch.save(model.state_dict())
    return lstm_model

In [90]:
train_model(train_loader)

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/10], RMSE on validation data: 0.19470462299237312
Epoch [2/10], RMSE on validation data: 0.17271015611667567
Epoch [3/10], RMSE on validation data: 0.11168149516464093
Epoch [4/10], RMSE on validation data: 0.10652974043979811
Epoch [5/10], RMSE on validation data: 0.13228593485973064
Epoch [6/10], RMSE on validation data: 0.13436266413702383
Epoch [7/10], RMSE on validation data: 0.10969141055424163
Epoch [8/10], RMSE on validation data: 0.08177059063703315
Epoch [9/10], RMSE on validation data: 0.0870909988296493
Epoch [10/10], RMSE on validation data: 0.10273631967944948
Model saved to ../data/06_models/model.pth


'../data/06_models/model.pth'

In [94]:
# model inference below

inf_model = RNN(input_size=parameters['model_options']['input_size'], 
                  hidden_size=parameters['model_options']['hidden_size'], 
                  num_layers=parameters['model_options']['num_layers'], 
                  num_classes=parameters['model_options']['num_classes'])

inf_model.load_state_dict(torch.load('../data/06_models/model.pth'))
inf_model.eval()

# Example of a dummy input (replace with actual data as needed)
dummy_input = torch.randn(1, parameters['model_options']['sequence_length'], parameters['model_options']['input_size'])  # Shape: [batch_size, sequence_length, input_size]

# Determine the device where the model is
device = next(inf_model.parameters()).device

# Move the dummy_input to the same device as the model
dummy_input = dummy_input.to(device)

# Now you can pass the dummy_input to the model
with torch.no_grad():  # Disable gradient calculation
    prediction = inf_model(dummy_input)

print(prediction)


tensor([[1.3896]])


'torch.FloatTensor'