In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import pandas as pd
import PIL

In [2]:
train_path = '/home/kannika/codes_AI/Rheology2023/MSDT_datatrain.csv'
print(train_path)
test_path = '/home/kannika/codes_AI/Rheology2023/MSDT_datatest.csv'
print(test_path)

/home/kannika/codes_AI/Rheology2023/MSDT_datatrain.csv
/home/kannika/codes_AI/Rheology2023/MSDT_datatest.csv


In [3]:
train = pd.read_csv(train_path)
print(train.shape)
train.head()

(23635, 8)


Unnamed: 0.1,Unnamed: 0,GLY,folderName,fileName,pathimg,idt,MSDT,subset
0,0,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944...,/media/SSD/rheology2023/Glycerol_FFT/GLY10/GLY...,39,1.32,train
1,1,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944...,/media/SSD/rheology2023/Glycerol_FFT/GLY10/GLY...,12,0.42,train
2,2,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944...,/media/SSD/rheology2023/Glycerol_FFT/GLY10/GLY...,398,14.37,train
3,3,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944...,/media/SSD/rheology2023/Glycerol_FFT/GLY10/GLY...,31,1.04,train
4,4,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944,GLY10P100_D0_30HZ_20XINF_UWELL_20221229_213944...,/media/SSD/rheology2023/Glycerol_FFT/GLY10/GLY...,100,3.53,train


In [4]:
test = pd.read_csv(test_path)
print(test.shape)
test.head()

(870, 8)


Unnamed: 0.1,Unnamed: 0,GLY,folderName,fileName,pathimg,idt,MSDT,subset
0,0,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204...,/media/SSD/rheology2023/Glycerol_FFT_test/GLY1...,158,5.63,test
1,1,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204...,/media/SSD/rheology2023/Glycerol_FFT_test/GLY1...,25,0.84,test
2,2,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204...,/media/SSD/rheology2023/Glycerol_FFT_test/GLY1...,7,0.25,test
3,3,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204...,/media/SSD/rheology2023/Glycerol_FFT_test/GLY1...,125,4.45,test
4,4,GLY10,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204,GLY10P100_D0_30HZ_20XINF_UWELL_20221230_072204...,/media/SSD/rheology2023/Glycerol_FFT_test/GLY1...,19,0.63,test


In [5]:
# data constants
BATCH_SIZE = 64
NUM_WORKERS = 1
IMG_SIZE = 384

## Function Create dataset list
def create_listdataset(df):
    img_list = df['pathimg'].tolist()
    label_list = df['MSDT'].tolist()
    return img_list, label_list

def split_datasets(train_path, test_path,  VALID_SPLIT): 
    train = pd.read_csv(train_path)
    print(f"Dataset Train Set : {train.shape[0]} images")
    test = pd.read_csv(test_path)
    print(f"Dataset Test Set : {test.shape[0]} images")
    print(f"*"*125)
    ## Split train, validation set
    trainset, validset = train_test_split(train, test_size=VALID_SPLIT, random_state=42, shuffle=True)
    print(f"Train set : {trainset.shape[0]} images")
    print(f"Validation set : {validset.shape[0]} images")
    ## Crate data list
    train_img_list, train_label_list = create_listdataset(trainset)
    print(f"For train set ; Images {len(train_img_list)} , Label; {len(train_label_list)}")
    val_img_list, val_label_list = create_listdataset(validset)
    print(f"For Validation set ; Images {len(val_img_list)} , Label; {len(val_label_list)}")
    test_img_list, test_label_list =  create_listdataset(test)
    print(f"For test set ; Images {len(test_img_list)} , Label; {len(test_label_list)}")
    
    return train_img_list, train_label_list, val_img_list, val_label_list, test_img_list, test_label_list

In [6]:
train_img_list, train_label_list, val_img_list, val_label_list, test_img_list, test_label_list = split_datasets(train_path, test_path,  0.2)

Dataset Train Set : 23635 images
Dataset Test Set : 870 images
*****************************************************************************************************************************
Train set : 18908 images
Validation set : 4727 images
For train set ; Images 18908 , Label; 18908
For Validation set ; Images 4727 , Label; 4727
For test set ; Images 870 , Label; 870


In [7]:
#The Training and Validation Transforms
# create image augmentations
def trans_train():
    transforms_train = transforms.Compose(
        [
            transforms.Resize((IMG_SIZE, IMG_SIZE)),
            transforms.RandomHorizontalFlip(p=0.3),
            transforms.RandomVerticalFlip(p=0.3),
            transforms.RandomResizedCrop(IMG_SIZE),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]
    )
    return transforms_train 

def trans_valid():    
    transforms_valid = transforms.Compose(
        [
            transforms.Resize((IMG_SIZE, IMG_SIZE)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]
    )
    return transforms_valid 

In [8]:
class create_datasets(torch.utils.data.Dataset):
    """
    Helper Class to create the pytorch dataset
    """

    def __init__(self, img_list, label_list, transforms=None):
        self.img_list = img_list
        self.label_list = label_list
        #self.mode = mode  # train 
        self.transforms = transforms

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, index):
        label = self.label_list[index]
        image = self.img_list[index]
        img = PIL.Image.open(image).convert('RGB')
        
        if self.transforms is not None:
            transformed_img = self.transforms(img)
        
        return transformed_img, label

In [21]:
## Data Loader    
def create_data_loaders(dataset_train, dataset_valid, dataset_test):
    """
    Function to build the data loaders.
    Parameters:
    :param dataset_train: The training dataset.
    :param dataset_valid: The validation dataset.
    :param dataset_test: The test dataset.
    """
    train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True, 
                                                   num_workers=NUM_WORKERS)
    valid_loader = torch.utils.data.DataLoader(dataset=dataset_valid, batch_size=BATCH_SIZE, shuffle=False, 
                                                   num_workers=NUM_WORKERS)
    test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=BATCH_SIZE, shuffle=False, 
                                                  num_workers=NUM_WORKERS)
  
    return train_loader, valid_loader, test_loader

In [22]:
dataset_train = create_datasets(img_list=train_img_list, label_list=train_label_list, transforms=trans_train())
dataset_valid = create_datasets(img_list=val_img_list, label_list=val_label_list, transforms=trans_train())
dataset_test = create_datasets(img_list=test_img_list, label_list=test_label_list, transforms=trans_valid())
## Data loader
train_loader, valid_loader, test_loader = create_data_loaders(dataset_train, dataset_valid, dataset_test)

In [23]:
dataset_train

<__main__.create_datasets at 0x7f7f63a70490>

In [24]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f7f63a85220>

In [26]:
for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
    image, labels = data
    image = image.to(device)
    labels = labels.to(device)

TypeError: 'module' object is not callable

In [15]:
test_loader

<torch.utils.data.dataloader.DataLoader at 0x7f7f31371880>

In [13]:
import tqdm

for i, data in tqdm(enumerate(test_loader), total=len(test_loader)):
    print(data)

TypeError: 'module' object is not callable

In [16]:
def modify_model(pretrained, fine_tune, dropout):
    """
    Function to build the neural network model. Returns the final model.
    Parameters
    :param pretrained (bool): Whether to load the pre-trained weights or not.
    :param fine_tune (bool): Whether to train the hidden layers or not.
    :param num_classes (int): Number of classes in the dataset. 
    :dropout :Dropout to be True for all the different layers with prob == 0.1.
    """
    if pretrained:
        print('[INFO]: Loading pre-trained weights')
    elif not pretrained:
        print('[INFO]: Not loading pre-trained weights')
    model = timm.create_model("vit_large_patch32_384", pretrained=pretrained)
    if fine_tune:
        print('[INFO]: Fine-tuning all layers...')
        for params in model.parameters():
            params.requires_grad = True
    elif not fine_tune:
        print('[INFO]: Freezing hidden layers...')
        for params in model.parameters():
            params.requires_grad = False
            
    # Remove the final classification layer
    model.head = nn.Identity()
    # Add a new regression head
    model.regressor = nn.Linear(in_features=model.embed_dim, out_features=1)
    
    if dropout:
        print('[INFO]: Dropout to be True for all the different layers ...')
        model.apply(lambda m: set_dropout_p(m, p=0.1))
  
    return model 

In [21]:
# import torch
# import torch.nn as nn
# import torchvision.models as models

# model = models.vit_base_patch16_224(pretrained=True)
# model

In [19]:
model = timm.create_model("vit_large_patch32_384", pretrained=True)
print(model)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(32, 32), stride=(32, 32))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU()
        (drop1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop2): Dropout(p=0.0, inplace=False)
    

In [18]:
import timm

model_regress = modify_model(pretrained=True, fine_tune=False, dropout=False)
print(model_regress)

[INFO]: Loading pre-trained weights


Downloading:   0%|          | 0.00/1.23G [00:00<?, ?B/s]

[INFO]: Freezing hidden layers...
VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(32, 32), stride=(32, 32))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU()
        (drop1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop2): 

In [22]:
# total parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.\n")

306,632,680 total parameters.
306,632,680 training parameters.



In [23]:
# total parameters and trainable parameters
total_params = sum(p.numel() for p in model_regress.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model_regress.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.\n")

305,608,705 total parameters.
1,025 training parameters.

