# EDA DATA

# DATA FORMAT PATH

In [42]:
import os 
import random
paths = os.listdir('datasets/train')
labels = [random.choice([0,1]) for x in range(len(paths))]

In [43]:
import pandas as pd
df = pd.DataFrame([paths,labels])

In [44]:
df = df.transpose()
df = df.rename(columns={0:"paths",1:"labels"})

In [49]:
df.to_csv('datasets/train.csv')

In [50]:
def dataformat(path,name):
    paths = os.listdir(path)
    labels = [random.choice([0,1]) for x in range(len(paths))]
    df = pd.DataFrame([paths,labels])
    df = df.transpose()
    df = df.rename(columns={0:"paths",1:"labels"})
    df.to_csv(f'datasets/{name}.csv')

In [52]:
dataformat('datasets/test',name = 'test')
dataformat('datasets/valid',name = 'valid')

# CUSTOM DATASETS

In [13]:
import torch,cv2
from torch.utils.data import Dataset,DataLoader
import pandas as pd
class CustomDataset(Dataset):
    def __init__(self, path_csv,absolute_path="datasets/train"):
        self.path_csv = path_csv
        self.data = pd.read_csv(path_csv)
        self.absolute_path = absolute_path
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = self.data['paths'][idx]
        label = self.data['labels'][idx]
        image = cv2.imread(self.absolute_path+"/"+image_path)
        image = cv2.resize(image,(224,224))
        return torch.tensor(image), torch.tensor(label, dtype=torch.long)

# DATA LOADER

In [14]:
data_train = CustomDataset('datasets/train.csv')
data_test = CustomDataset('datasets/test.csv')
BATCH_SIZE = 32
TRAINLOADER = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
TESTLOADER = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)


In [15]:
# for inputs, labels in TRAINLOADER:
#     print(inputs.shape)
#     print(labels.shape)
#     exit()

# Build model

In [16]:
import torch
import torch.nn as nn

input_features = (224,224,3)
class Custom_Model(nn.Module):
    def __init__(self, num_classes=2):
        super(Custom_Model, self).__init__()
        self.conv3x3 = nn.Conv2d(in_channels=224,out_channels=112,kernel_size=3)
        self.conv1x1 = nn.Conv2d(in_channels=112,out_channels=64,kernel_size=1)
        self.linear = nn.Linear(in_features= 64*222, out_features= num_classes)
    def forward(self,x):
        b = x.shape[0]
        x = self.conv3x3(x)
        x = self.conv1x1(x)
        x = x.view(b,-1)
        x = self.linear(x)
        return x

### Test model

In [17]:
input_test = torch.rand((8,224,224,3))
model = Custom_Model()
output_test = model(input_test)
output_test.shape

torch.Size([8, 2])

# Custom loss

In [18]:
import torch
import torch.nn as nn

class CustomCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(CustomCrossEntropyLoss, self).__init__()

    def forward(self, input, target):
        log_probs = torch.log_softmax(input, dim=1)
        loss = -log_probs.gather(dim=1, index=target.view(-1, 1))
        
        loss = loss.mean()
        return loss

### Test loss

In [19]:
input_tensor = torch.randn(3, 5)  # Example input tensor with batch size 3 and 5 classes
target_tensor = torch.tensor([1, 0, 3])  # Example target tensor with class indices

# Instantiate the custom loss
custom_loss = CustomCrossEntropyLoss()

# Compute the loss
loss = custom_loss(input_tensor, target_tensor)

print(loss)

tensor(2.2939)


# Train

### config

In [20]:
import torch.optim as optim

model = Custom_Model()
criterion = CustomCrossEntropyLoss()

device = torch.device('cpu')

model.to(device)
criterion.to(device)

EPOCHS = 10
lr = 0.001
optimizer = optim.Adam(model.parameters(), lr=float(lr))
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)


### run 

In [21]:
from tqdm import tqdm


for epoch in range(EPOCHS):
    loop = tqdm(enumerate(TRAINLOADER), total=len(TRAINLOADER), leave=True)
    
    for batch_idx, (inputs, targets) in loop:
        inputs = inputs.to(device).to(torch.float32)
        targets = targets.to(device)
        optimizer.zero_grad()
        out = model(inputs)
        loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        loop.set_postfix(loss=loss.item())

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss
    }
    torch.save(checkpoint, 'checkpoint.pth')
    print("Checkpoint saved for epoch:", epoch)

100%|██████████| 2/2 [00:00<00:00,  6.99it/s, loss=530] 


Checkpoint saved for epoch: 0


100%|██████████| 2/2 [00:00<00:00,  7.09it/s, loss=120]    


Checkpoint saved for epoch: 1


100%|██████████| 2/2 [00:00<00:00,  7.43it/s, loss=1.59e+3]


Checkpoint saved for epoch: 2


100%|██████████| 2/2 [00:00<00:00,  7.41it/s, loss=3.34e+3]


Checkpoint saved for epoch: 3


100%|██████████| 2/2 [00:00<00:00,  5.35it/s, loss=2.6e+3] 


Checkpoint saved for epoch: 4


100%|██████████| 2/2 [00:00<00:00,  7.43it/s, loss=738]


Checkpoint saved for epoch: 5


100%|██████████| 2/2 [00:00<00:00,  6.41it/s, loss=149]


Checkpoint saved for epoch: 6


100%|██████████| 2/2 [00:00<00:00,  7.30it/s, loss=1.72e+3]


Checkpoint saved for epoch: 7


100%|██████████| 2/2 [00:00<00:00,  7.33it/s, loss=122]


Checkpoint saved for epoch: 8


100%|██████████| 2/2 [00:00<00:00,  6.92it/s, loss=361]

Checkpoint saved for epoch: 9





### Load ckpt continues training

In [22]:
checkpoint = torch.load('checkpoint.pth')
# Load the model state dict
model.load_state_dict(checkpoint['model_state_dict'])
# Load the optimizer state dict
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# Load the loss
loss = checkpoint['loss']

In [23]:
additional_epochs = 5
start_epoch = checkpoint['epoch'] + 1  # Start from the next epoch after the last checkpoint

# Continue training
for epoch in range(start_epoch,start_epoch+additional_epochs):
    loop = tqdm(enumerate(TRAINLOADER), total=len(TRAINLOADER), leave=True)
    
    for batch_idx, (inputs, targets) in loop:
        inputs = inputs.to(device).to(torch.float32)
        targets = targets.to(device)
        optimizer.zero_grad()
        out = model(inputs)
        # loss = criterion(out, targets)
        loss.backward()
        optimizer.step()
        loop.set_postfix(loss=loss.item())

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss
    }
    torch.save(checkpoint, 'checkpoint.pth')
    print("Checkpoint saved for epoch:", epoch)

100%|██████████| 2/2 [00:00<00:00,  4.83it/s, loss=361]


Checkpoint saved for epoch: 10


100%|██████████| 2/2 [00:00<00:00,  8.73it/s, loss=361]


Checkpoint saved for epoch: 11


100%|██████████| 2/2 [00:00<00:00,  8.85it/s, loss=361]


Checkpoint saved for epoch: 12


100%|██████████| 2/2 [00:00<00:00,  8.66it/s, loss=361]


Checkpoint saved for epoch: 13


100%|██████████| 2/2 [00:00<00:00,  8.49it/s, loss=361]

Checkpoint saved for epoch: 14





In [88]:
torch.save(model,'model.pth')

### inference

In [91]:
model = torch.load('model.pth')
input = torch.rand(1,224,224,3)
out = model(input)
out

tensor([[0.0061, 0.0617]], grad_fn=<AddmmBackward0>)