In [1]:
import torch
import torch.nn as nn 
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark  =False

seed = 59
set_seed(seed)

In [3]:
root_dir = '/kaggle/input/weather-dataset/weather-dataset/dataset'
img_paths = []
labels = []
classes = {
    label_idx: class_name for label_idx, class_name in enumerate(sorted(os.listdir(root_dir)))
}

for label_idx, class_name in classes.items():
    class_dir = os.path.join(root_dir, class_name)
    for img_filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_filename)
        img_paths.append(img_path)
        labels.append(label_idx)

In [4]:
val_size = 0.2
test_size = 0.125
is_shuffle = True

X_train, X_val, y_train, y_val = train_test_split(
    img_paths, labels,
    test_size=val_size,
    random_state=seed,
    shuffle=is_shuffle
)

X_train, X_test, y_train, y_test = train_test_split(
    X_train, y_train,
    test_size=val_size,
    random_state=seed,
    shuffle=is_shuffle
)

In [5]:
class WeatherDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.transform = transform
        self.img_paths = X
        self.labels = y
    
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
            
        return img, self.labels[idx]

In [6]:
def transform(img, img_size=(224, 224)):
    img = img.resize(img_size)
    img = np.array(img)[..., :3]
    img = torch.tensor(img).permute(2, 0, 1).float()
    normalized_img = img/255.0
    
    return normalized_img

In [7]:
train_dataset = WeatherDataset(X_train, y_train, transform=transform)
val_dataset = WeatherDataset(X_val, y_val, transform=transform)
test_dataset = WeatherDataset(X_test, y_test, transform=transform)

train_batch_size = 512
test_batch_size = 8

train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=train_batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=train_batch_size, shuffle=False)

In [8]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.Sequential()
        
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        shortcut = x.clone()
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x += self.downsample(shortcut)
        x = self.relu(x)
        
        return x

In [9]:
class Resnet(nn.Module):
    def __init__(self, residual_block, n_block_lst, n_classes):
        super(Resnet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.batch_nomr1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = self.create_layer(residual_block, 64, 64, n_block_lst[0], 1)
        self.conv3 = self.create_layer(residual_block, 64, 128, n_block_lst[1], 2)
        self.conv4 = self.create_layer(residual_block, 128, 256, n_block_lst[2], 2)
        self.conv5 = self.create_layer(residual_block, 256, 512, n_block_lst[3], 2)
        self.avgpool = nn.AdaptiveMaxPool2d(1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(512, n_classes)
    
    def create_layer(self, residual_block, in_channels, out_channels, n_blocks, stride):
        blocks = []
        first_block = residual_block(in_channels, out_channels, stride)
        blocks.append(first_block)
        
        for idx in range(1, n_blocks):
            block = residual_block(out_channels, out_channels, stride)
            blocks.append(block)
        
        block_sequential = nn.Sequential(*blocks)
        return block_sequential
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_nomr1(x)
        x = self.maxpool(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        
        return x

In [10]:
n_classes = len(list(classes.keys()))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Resnet(
    residual_block=ResidualBlock,
    n_block_lst=[2, 2, 2, 2],
    n_classes=n_classes
).to(device)

In [11]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = 0
    total = 0
    losses = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    loss = sum(losses) / len(losses)
    acc = correct / total
    
    return correct, acc

In [12]:
from tqdm import tqdm

def fit(model, train_loader, val_loader, criterion, optimizer, device, epochs):
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        batch_train_losses = []
        model.train()
        
        for idx, (inputs, labels) in enumerate(tqdm(train_loader)):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            batch_train_losses.append(loss.item())
        
        train_loss = sum(batch_train_losses) / len(batch_train_losses)
        train_losses.append(train_loss)
        
        val_loss, val_acc = evaluate(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        
        print (f'EPOCH { epoch + 1}:\t Train loss :{train_loss:.4f}\t Val loss :{val_loss:.4f}')
    return train_losses, val_losses

In [13]:
lr = 1e-2
epochs = 25
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model.parameters(), lr=lr
)

train_losses, val_losses = fit(
    model, train_loader, val_loader, criterion, optimizer, device, epochs
)

100%|██████████| 9/9 [00:55<00:00,  6.16s/it]


EPOCH 1:	 Train loss :1.9178	 Val loss :249.0000


100%|██████████| 9/9 [00:39<00:00,  4.35s/it]


EPOCH 2:	 Train loss :1.2990	 Val loss :201.0000


100%|██████████| 9/9 [00:38<00:00,  4.29s/it]


EPOCH 3:	 Train loss :1.0173	 Val loss :191.0000


100%|██████████| 9/9 [00:38<00:00,  4.30s/it]


EPOCH 4:	 Train loss :0.8343	 Val loss :368.0000


100%|██████████| 9/9 [00:38<00:00,  4.29s/it]


EPOCH 5:	 Train loss :0.6812	 Val loss :753.0000


100%|██████████| 9/9 [00:37<00:00,  4.19s/it]


EPOCH 6:	 Train loss :0.5065	 Val loss :860.0000


100%|██████████| 9/9 [00:37<00:00,  4.16s/it]


EPOCH 7:	 Train loss :0.4336	 Val loss :826.0000


100%|██████████| 9/9 [00:37<00:00,  4.14s/it]


EPOCH 8:	 Train loss :0.2878	 Val loss :869.0000


100%|██████████| 9/9 [00:38<00:00,  4.24s/it]


EPOCH 9:	 Train loss :0.2375	 Val loss :892.0000


100%|██████████| 9/9 [00:36<00:00,  4.10s/it]


EPOCH 10:	 Train loss :0.1791	 Val loss :938.0000


100%|██████████| 9/9 [00:37<00:00,  4.16s/it]


EPOCH 11:	 Train loss :0.1145	 Val loss :941.0000


100%|██████████| 9/9 [00:37<00:00,  4.12s/it]


EPOCH 12:	 Train loss :0.1041	 Val loss :957.0000


100%|██████████| 9/9 [00:36<00:00,  4.10s/it]


EPOCH 13:	 Train loss :0.0734	 Val loss :968.0000


100%|██████████| 9/9 [00:37<00:00,  4.16s/it]


EPOCH 14:	 Train loss :0.0570	 Val loss :970.0000


100%|██████████| 9/9 [00:37<00:00,  4.18s/it]


EPOCH 15:	 Train loss :0.0503	 Val loss :962.0000


100%|██████████| 9/9 [00:37<00:00,  4.12s/it]


EPOCH 16:	 Train loss :0.0382	 Val loss :962.0000


100%|██████████| 9/9 [00:37<00:00,  4.11s/it]


EPOCH 17:	 Train loss :0.0344	 Val loss :963.0000


100%|██████████| 9/9 [00:36<00:00,  4.10s/it]


EPOCH 18:	 Train loss :0.0318	 Val loss :966.0000


100%|██████████| 9/9 [00:37<00:00,  4.12s/it]


EPOCH 19:	 Train loss :0.0264	 Val loss :970.0000


100%|██████████| 9/9 [00:37<00:00,  4.11s/it]


EPOCH 20:	 Train loss :0.0245	 Val loss :970.0000


100%|██████████| 9/9 [00:37<00:00,  4.16s/it]


EPOCH 21:	 Train loss :0.0218	 Val loss :961.0000


100%|██████████| 9/9 [00:37<00:00,  4.22s/it]


EPOCH 22:	 Train loss :0.0187	 Val loss :969.0000


100%|██████████| 9/9 [00:38<00:00,  4.30s/it]


EPOCH 23:	 Train loss :0.0192	 Val loss :967.0000


100%|██████████| 9/9 [00:37<00:00,  4.21s/it]


EPOCH 24:	 Train loss :0.0171	 Val loss :965.0000


100%|██████████| 9/9 [00:37<00:00,  4.19s/it]


EPOCH 25:	 Train loss :0.0155	 Val loss :960.0000


In [14]:
val_loss, val_acc = evaluate(
    model, val_loader, criterion, device
)

test_loss, test_acc = evaluate(
    model, test_loader, criterion, device
)
print('Evaluation on val/test dataset')
print('Val accuracy: ', val_acc)
print('Test accuracy: ', test_acc)

Evaluation on val/test dataset
Val accuracy:  0.6991988346686089
Test accuracy:  0.7003642987249544
