In [1]:
import os 
os.chdir("../")

In [2]:
pwd

'/home/jvdboss/workspace/ML_DL/image_classification/Lung_Xray_Classifier'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TransformDataConfig:
    root_dir: Path
    unzip_dir: Path
    transform_train_path: Path
    transform_test_path: Path
    params_brightness: float
    params_contrast: float
    params_saturation: float 
    params_hue: float
    ingested_data: Path

In [4]:
from xray.constants import *
from xray.utils import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
        
    def get_transform_data_config(self) -> TransformDataConfig:
        config = self.config.data_transformation
        
        create_directories([config.root_dir])
        
        transform_data_config = TransformDataConfig(
            root_dir = Path(config.root_dir),
            unzip_dir = Path(config.unzip_dir),
            ingested_data = Path(config.ingested_data),
            transform_train_path = Path(config.transform_train_data),
            transform_test_path = Path(config.transform_test_data),
            params_brightness = self.params.BRIGHTNESS,
            params_contrast = self.params.CONTRAST,
            params_saturation = self.params.SATURATION,
            params_hue = self.params.HUE,
                                                   )
        return transform_data_config

In [6]:
import os 
import urllib.request as request
from zipfile import ZipFile
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

class TransformData:
    def __init__(self, config: TransformDataConfig):
        self.config = config
    
#     def get_list_files(dirName):
#         '''
#         input - directory location
#         output - list the files in the directory
#         '''
#         files_list = os.listdir(dirName)
        
#         return files_list
    
#     def get_file_names(self):
#         class_name = ['NORMAL','PNEUMONIA']


#         data_path = self.config.root_dir

#         files_list_normal_train = self.get_list_files(str(data_path)+'/train/'+class_name[0])
#         files_list_pneu_train = self.get_list_files(data_path+'/train/'+class_name[1])
#         files_list_normal_test = self.get_list_files(data_path+'/test/'+class_name[0])
#         files_list_pneu_test = self.get_list_files(data_path+'/test/'+class_name[1])
        
#         return (files_list_normal_train,files_list_pneu_train,files_list_normal_test,files_list_pneu_test)
    
    def transforming_training_data(self):
        train_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ColorJitter(brightness=self.config.params_brightness, contrast=self.config.params_contrast, saturation=self.config.params_saturation, hue=self.config.params_hue),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                              [0.229, 0.224, 0.225])
        ])
        return train_transform
    
    def transforming_testing_data(self):
        test_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                              [0.229, 0.224, 0.225])
        ])
        return test_transform
        
    def data_loader(self):
        data_path = self.config.ingested_data
        train_transform = self.transforming_training_data
        test_transform = self.transforming_testing_data
        os.makedirs(os.path.join(data_path, 'train'),exist_ok=True)
        os.makedirs(os.path.join(data_path, 'test'),exist_ok=True)

        train_data = datasets.ImageFolder(os.path.join(data_path, 'train'), transform= train_transform)
        test_data = datasets.ImageFolder(os.path.join(data_path, 'test'), transform= test_transform)
        
        
        train_loader = DataLoader(train_data,
                                  batch_size= 2, shuffle= True, pin_memory= True)
        test_loader = DataLoader(test_data,
                                 batch_size= 2, shuffle= False, pin_memory= True)
        class_names = train_data.classes
        print(class_names)
        print(f'Number of train images: {len(train_data)}')
        print(f'Number of test images: {len(test_data)}')
        return train_loader,test_loader
        
    def run_transformation_data(self):
#         self.get_file_names()
        self.transforming_training_data()
        self.transforming_testing_data()
        train_loader,test_loader = self.data_loader()
        return train_loader,test_loader

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
try:
    config = ConfigurationManager()
    transform_data_config = config.get_transform_data_config()
    transformation_data = TransformData(config = transform_data_config)
    transformation_data.run_transformation_data()
except Exception as e:
    raise e 

[2022-10-15 16:14:35,942: INFO: common]: yaml file: configs/config.yaml loaded successfully
[2022-10-15 16:14:35,945: INFO: common]: yaml file: params.yaml loaded successfully
[2022-10-15 16:14:35,946: INFO: common]: created directory at: artifacts
[2022-10-15 16:14:35,947: INFO: common]: created directory at: artifacts/transform_data
['NORMAL', 'PNEUMONIA']
Number of train images: 252
Number of test images: 197


In [11]:
# def get_list_files(dirName):
#     '''
#     input - directory location
#     output - list the files in the directory
#     '''
#     files_list = os.listdir(dirName)

#     return files_list

In [12]:
# transform_data_config = config.get_transform_data_config()

In [13]:
# datapath =transform_data_config.root_dir
# get_list_files(str(datapath)+'/train/NORMAL')

In [14]:
# get_list_files()

In [None]:
# transform_data_config

In [None]:
# data_path = self.config.unzip_dir

In [None]:
# class_name = ['NORMAL','PNEUMONIA']
# def get_list_files(dirName):
#     '''
#     input - directory location
#     output - list the files in the directory
#     '''
#     files_list = os.listdir(dirName)
#     return files_list

# files_list_normal_train = get_list_files(data_path+'/train/'+self.config["artifacts"]['data_transformation']['class_name']['NORMAL'])
# files_list_pneu_train = get_list_files(data_path+'/train/'+self.config["artifacts"]['data_transformation']['class_name']['PNEUMONIA'])
# files_list_normal_test = get_list_files(data_path+'/test/'+self.config["artifacts"][data_transformation][class_name][NORMAL])-
# files_list_pneu_test = get_list_files(data_path+'/test/'+self.config["artifacts"]['data_transformation']['class_name']['PNEUMONIA'])

In [None]:
# train_transform = transforms.Compose([
#     transforms.Resize(224),
#     transforms.CenterCrop(224),
#     transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(10),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406],
#                           [0.229, 0.224, 0.225])
# ])
# test_transform = transforms.Compose([
#     transforms.Resize(224),
#     transforms.CenterCrop(224),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406],
#                           [0.229, 0.224, 0.225])
# ])


In [None]:
# train_data = datasets.ImageFolder(os.path.join(data_path, 'train'), transform= train_transform)
# test_data = datasets.ImageFolder(os.path.join(data_path, 'test'), transform= test_transform)
# train_loader = DataLoader(train_data,
#                           batch_size= 2, shuffle= True, pin_memory= True)
# test_loader = DataLoader(test_data,
#                          batch_size= 2, shuffle= False, pin_memory= True)
# class_names = train_data.classes
# print(class_names)
# print(f'Number of train images: {len(train_data)}')
# print(f'Number of test images: {len(test_data)}')

In [None]:

# def data_loader(self):
#     train_data = datasets.ImageFolder(os.path.join(data_path, 'train'), transform= train_transform)
#     test_data = datasets.ImageFolder(os.path.join(data_path, 'test'), transform= test_transform)
#     train_loader = DataLoader(train_data,
#                               batch_size= 2, shuffle= True, pin_memory= True)
#     test_loader = DataLoader(test_data,
#                              batch_size= 2, shuffle= False, pin_memory= True)
#     class_names = train_data.classes
#     print(class_names)
#     print(f'Number of train images: {len(train_data)}')
#     print(f'Number of test images: {len(test_data)}')