In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=False)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    resnet_model_path: Path
    training_data: Path
    test_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list

In [4]:
from IntelImageClassification.constants import *
from IntelImageClassification.utils.common import read_yaml,create_directories
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import os
from pathlib import Path
import random
from PIL import Image
from PIL import Image
import warnings
warnings.filterwarnings('ignore')



In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_resnet_model = self.config.prepare_resnet_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.unzip_dir, "seg_train","seg_train")
        print(training_data)
        test_data=os.path.join(self.config.data_ingestion.unzip_dir, "seg_test","seg_test")
        print(test_data)
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            resnet_model_path=Path(prepare_resnet_model.resnet_model_path),
            training_data=Path(training_data),
            test_data=Path(test_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE
        )

        return training_config

In [None]:
class Training:
    def __init__(self,config:TrainingConfig):
        self.config=config
    
    def get_resnet_model(self):
        self.model=torch.load(self.config.resnet_model_path).to(torch.device('cuda'))
    
    def train_valid_generator(self):
        self.training_data=datasets.ImageFolder(root=self.config.training_data,transform=transforms.Compose([
        transforms.Resize(size=(150 , 150)) ,
        transforms.RandomCrop(size=(150,150)),  
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])]))
        self.test_data=datasets.ImageFolder(root=self.config.test_data , transform = transforms.Compose([
        transforms.Resize((150, 150)), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])]))
    
    def class_finder(self):
        self.classes = sorted(i.name for i in os.scandir(self.config.training_data) if i.is_dir())
        if not self.classes:
            raise FileNotFoundError(f'This directory dose not have any classes : {self.training_data}')
        self.class_to_inx = {name : value for name , value in enumerate(self.classes) }
    

    def dataloaders(self):
        self.number_train=len(self.training_data)
        self.indx = list(range(self.number_train))
        np.random.shuffle(self.indx)
        self.split = int(0.10 * self.number_train)
        train_idx, valid_idx = self.indx[self.split:], self.indx[:self.split]
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)
        self.train_dataloader=DataLoader(dataset= self.training_data , 
                              batch_size= 32 ,  
                              num_workers=0,
                              sampler=train_sampler)
        self.valid_dataloader = DataLoader(dataset=self.training_data , 
                             batch_size=32 , 
                             num_workers=0,
                             sampler=valid_sampler                            
                             )
        self.test_dataloader = DataLoader(dataset=self.test_data,
                            batch_size=32,
                            num_workers=0,
                            shuffle=False)
        
    def train_model(self):
        self.criterion=nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.01)
        self.epochs=1
        self.train_samples_num = 12630
        self.val_samples_num = 1404
        self.train_costs=[]
        self.val_costs=[]
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'



        for epoch in range(self.epochs):

            train_running_loss = 0
            correct_train = 0
        
            self.model.train().cuda()
        
            for inputs, labels in self.train_dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
            
                self.optimizer.zero_grad()
                prediction = self.model(inputs)
                        
                loss = self.criterion(prediction, labels)
          
                loss.backward()         
                self.optimizer.step()
                _, predicted_outputs = torch.max(prediction.data, 1)
                correct_train += (predicted_outputs == labels).float().sum().item()
                train_running_loss += (loss.data.item() * inputs.shape[0])


            train_epoch_loss = train_running_loss / self.train_samples_num
        
            self.train_costs.append(train_epoch_loss)
        
            train_acc =  correct_train / self.train_samples_num
            val_running_loss = 0
            correct_val = 0
      
            self.model.eval().cuda()
    
            with torch.no_grad():
                for inputs, labels in self.valid_dataloader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    prediction = self.model(inputs)
                    loss = self.criterion(prediction, labels)
                    _, predicted_outputs = torch.max(prediction.data, 1)
                    correct_val += (predicted_outputs == labels).float().sum().item()

                val_running_loss += (loss.data.item() * inputs.shape[0])

                val_epoch_loss = val_running_loss / self.val_samples_num
                self.val_costs.append(val_epoch_loss)
                val_acc =  correct_val / self.val_samples_num
        
            info = "[Epoch {}/{}]: train-loss = {:0.6f} | train-acc = {:0.3f} | val-loss = {:0.6f} | val-acc = {:0.3f}"
        
            print(info.format(epoch+1, self.epochs, train_epoch_loss, train_acc, val_epoch_loss, val_acc))
        
            torch.save(self.model.state_dict(), 'checkpoint_gpu_{}'.format(epoch + 1)) 
                                                                
        torch.save(self.model.state_dict(), Path('artifacts/training/resnet-50_weights_gpu'))  
        
        return self.train_costs, self.val_costs

    
        







        


        
        


        
    
        
    
    


In [7]:
config = ConfigurationManager()
training_config = config.get_training_config()
training = Training(config=training_config)

[2025-02-15 13:03:12,528: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-02-15 13:03:12,530: INFO: common: yaml file: params.yaml loaded successfully]
[2025-02-15 13:03:12,531: INFO: common: created directory at: artifacts]
artifacts/data_ingestion\seg_train\seg_train
artifacts/data_ingestion\seg_test\seg_test
[2025-02-15 13:03:12,533: INFO: common: created directory at: artifacts\training]


In [8]:
training.get_resnet_model()

In [9]:
training.train_valid_generator()

In [10]:
training.training_data

Dataset ImageFolder
    Number of datapoints: 14034
    Root location: artifacts\data_ingestion\seg_train\seg_train
    StandardTransform
Transform: Compose(
               Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=warn)
               RandomCrop(size=(150, 150), padding=None)
               ToTensor()
               Normalize(mean=[0.5], std=[0.5])
           )

In [11]:
training.test_data

Dataset ImageFolder
    Number of datapoints: 3000
    Root location: artifacts\data_ingestion\seg_test\seg_test
    StandardTransform
Transform: Compose(
               Resize(size=(150, 150), interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
               Normalize(mean=[0.5], std=[0.5])
           )

In [12]:
training.class_finder()

In [13]:
training.dataloaders()

In [14]:
training.dataloaders()

In [17]:
training.train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x2aca9dea8d0>

In [18]:
training.valid_dataloader

<torch.utils.data.dataloader.DataLoader at 0x2aca9e1e2d0>

In [19]:
training.number_train

14034

In [22]:
training.train_model()

[Epoch 1/1]: train-loss = 1.443819 | train-acc = 0.459 | val-loss = 0.019042 | val-acc = 0.610


([1.4438191619593288], [0.019041762902186468])

In [23]:
training.train_costs

[1.4438191619593288]

In [24]:
training.val_costs

[0.019041762902186468]

In [15]:
training_config

TrainingConfig(root_dir=WindowsPath('artifacts/training'), trained_model_path=WindowsPath('artifacts/training/trained_model.pt'), resnet_model_path=WindowsPath('artifacts/prepare_resnet_model/resnet_model.pt'), training_data=WindowsPath('artifacts/data_ingestion/seg_train/seg_train'), test_data=WindowsPath('artifacts/data_ingestion/seg_test/seg_test'), params_epochs=1, params_batch_size=16, params_is_augmentation=True, params_image_size=BoxList([150, 150, 3]))

In [16]:
training.get_resnet_model()

In [None]:
mdl=training.model

In [None]:
training.train_valid_generator()

In [None]:
training.training_data

In [None]:
training.test_data

In [None]:
training.class_finder()

In [None]:
cls=training.classes

In [None]:
inx=training.class_to_inx

In [None]:
train_data_custom = training.ImageFolderCustom(target_dir=training.config.training_data, 
                                      transform=transforms.Compose([
        transforms.Resize(size=(150 , 150)) ,
        transforms.RandomCrop(size=(150,150)),  
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])]),classes=cls,ctoinx=inx)
test_data_custom = training.ImageFolderCustom(target_dir= training.config.test_data, 
                                     transform=transforms.Compose([
        transforms.Resize((150, 150)), 
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5])]),classes=cls,ctoinx=inx)
train_data_custom, test_data_custom

In [None]:
len(train_data_custom)

In [None]:
len(training.training_data)

In [None]:
type(mdl)

In [None]:
mdl=mdl.to(torch.device('cuda'))

In [None]:
summary(mdl,(3,150,150),batch_size=32)

In [None]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_resnet_model()
    training.train_valid_generator()
    training.class_finder()
    training.dataloaders()
    training.train_model()
except Exception as e:
    raise e

