In [1]:
import os
os.chdir("../")
%pwd

'/home/ajith/projects/Spaceship_Titanic_MLOps_Project'

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    base_model_path: Path
    training_data: Path
    encoder_traindata: Path
    params_epochs: int
    params_batch_size: int

@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [3]:
from titanicSpaceShip.constants import *
from titanicSpaceShip.utils.common import read_yaml, create_directories
import torch
import time
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import OneHotEncoder

In [10]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config
    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.local_data_file)
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            base_model_path=Path(prepare_base_model.base_model_path),
            training_data=Path(training_data),
            encoder_traindata=Path(training.encoder_traindata),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE
        )

        return training_config

In [5]:
from titanicSpaceShip import logger

class SaveBestModel:
    """
    Class to save the best model while training. If the current epoch's 
    validation acc is greater than the previous least less, then save the
    model state.
    """
    def __init__(
        self, filepath, best_valid_acc=float(0)
    ):
        self.best_valid_acc = best_valid_acc
        self.filepath = filepath
        
    def __call__(
        self, current_valid_acc, 
        epoch, model, optimizer, criterion
    ):
        if current_valid_acc > self.best_valid_acc:
            self.best_valid_acc = current_valid_acc
            logger.info(f"\nBest validation acc: {self.best_valid_acc}")
            logger.info(f"\nSaving best model for epoch: {epoch+1}\n")
            torch.save({
                'epoch': epoch+1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': criterion,
                }, self.filepath)

class PrepareCallback:
    def __init__(self, config: PrepareCallbacksConfig):
        self.config = config
        # self.save_best_model = SaveBestModel()


    
    @property
    def _create_tb_callbacks(self):
        timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{timestamp}",
        )
        return SummaryWriter(log_dir=tb_running_log_dir)

    @property
    def _create_ckpt_callbacks(self):
        return SaveBestModel(
            filepath=self.config.checkpoint_model_filepath
        )


    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]


In [6]:
# from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
# from sklearn.preprocessing import LabelEncoder, StandardScaler
import pickle

In [8]:
import shutil

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    def __len__(self):
        return len(self.features)
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        
    def get_base_model(self):
        print(self.config.base_model_path)
        self.model = torch.load(
            self.config.base_model_path
        )
        self.criterion = nn.BCELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
    
    
    def train_valid_generator(self):

        data = pd.read_csv(self.config.training_data)
        y = data["Transported"].astype(int)
        X = data.drop(["SNo","Transported"], axis=1)
        encoder = OneHotEncoder(handle_unknown="ignore")
        X["CryoSleep"] = X["CryoSleep"].astype(float)
        X["VIP"] = X["VIP"].astype(float)
        multicol_encoded = encoder.fit(X[["HomePlanet","Destination"]])
        with open(self.config.encoder_traindata, 'wb') as f:
            pickle.dump(multicol_encoded, f)
        multicol_encoded = encoder.transform(X[["HomePlanet","Destination"]])
        multicol_encoded = multicol_encoded.toarray()
        multicol_encoded = pd.DataFrame(multicol_encoded, columns=encoder.get_feature_names_out())
        X.drop(["HomePlanet","Destination"], axis=1, inplace=True)
        X = pd.concat([X, multicol_encoded], axis=1)
        X_tensor = torch.Tensor(X.values)
        y_tensor = torch.Tensor(y.values)
        X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.20, random_state=21)
        train_dataset = CustomDataset(X_train, y_train)
        test_dataset = CustomDataset(X_test, y_test)
        self.train_loader = DataLoader(train_dataset, batch_size= self.config.params_batch_size, shuffle=True)
        self.test_loader = DataLoader(test_dataset, batch_size= self.config.params_batch_size, shuffle=True)

    @staticmethod
    def save_model(path: Path, destpath: Path):
        shutil.copyfile(path, destpath)


    def train(self, callback_list: list):
        for epoch in range(self.config.params_epochs):
            overall_loss = 0 
            for inputs, labels in self.train_loader:
                outputs = self.model(inputs)
                loss = self.criterion(outputs[:,0], labels)
                overall_loss += loss.item()
                loss.backward()
                self.optimizer.step()
            logger.info(f"Epoch {epoch+1} completed loss {overall_loss}")
            correct_predictions = 0
            total_samples = 0
            with torch.no_grad():
                for inputs, labels in self.test_loader:
                    outputs_test = self.model(inputs)
                    outputs_test = (outputs_test >= 0.3).float()
                    correct_predictions += torch.sum(outputs_test[:,0]==labels)
                    total_samples += labels.size(0)
            test_accuracy = correct_predictions/total_samples
            logger.info(f"Test Accuracy : {test_accuracy*100:.2f}%")
            tensorboard_writer, save_checkpoint = callback_list
            tensorboard_writer.add_scalar("Loss", overall_loss, epoch+1)
            tensorboard_writer.add_scalar("Test Accuracy", test_accuracy, epoch+1)
            save_checkpoint(test_accuracy*100, epoch+1, self.model, self.optimizer, self.criterion)

        self.save_model(
            path="artifacts/prepare_callbacks/checkpoint_dir/model.pt",
            destpath=self.config.trained_model_path
        )

In [11]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train(
        callback_list=callback_list
    )
    
except Exception as e:
    raise e


[2024-01-14 12:57:15,453: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-01-14 12:57:15,457: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-14 12:57:15,462: INFO: common: created directory at: artifacts]
[2024-01-14 12:57:15,471: INFO: common: created directory at: artifacts/prepare_callbacks/checkpoint_dir]
[2024-01-14 12:57:15,472: INFO: common: created directory at: artifacts/prepare_callbacks/tensorboard_log_dir]
[2024-01-14 12:57:15,504: INFO: common: created directory at: artifacts/training]


artifacts/prepare_base_model/base_model.pt
[2024-01-14 12:57:16,312: INFO: 2738894329: Epoch 1 completed loss 27.30075991153717]
[2024-01-14 12:57:16,327: INFO: 2738894329: Test Accuracy : 67.22%]
[2024-01-14 12:57:16,331: INFO: 3343904453: 
Best validation acc: 67.22254180908203]
[2024-01-14 12:57:16,332: INFO: 3343904453: 
Saving best model for epoch: 2
]
[2024-01-14 12:57:16,402: INFO: 2738894329: Epoch 2 completed loss 37.61884707212448]
[2024-01-14 12:57:16,418: INFO: 2738894329: Test Accuracy : 71.48%]
[2024-01-14 12:57:16,420: INFO: 3343904453: 
Best validation acc: 71.47785949707031]
[2024-01-14 12:57:16,420: INFO: 3343904453: 
Saving best model for epoch: 3
]
[2024-01-14 12:57:16,505: INFO: 2738894329: Epoch 3 completed loss 44.046762466430664]
[2024-01-14 12:57:16,514: INFO: 2738894329: Test Accuracy : 74.99%]
[2024-01-14 12:57:16,517: INFO: 3343904453: 
Best validation acc: 74.98562622070312]
[2024-01-14 12:57:16,518: INFO: 3343904453: 
Saving best model for epoch: 4
]
[2024