In [7]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.utils.data as Data
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
#hyperparameter
BatchSize = 128
lr = 0.001
EPOCH = 10

In [3]:
def df_dataloader(df,BatchSize,is_shuffle=True):
    df_target = torch.from_numpy(df['label'].values).float()
    df_data = torch.from_numpy(((df.iloc[:,1:].values)/255.).reshape(-1,1,28,28)).float()
    df = Data.TensorDataset(df_data,df_target)
    df_dataloader = DataLoader(df,batch_size=BatchSize,shuffle=is_shuffle)
    
    return df_dataloader

In [8]:
train = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_train.csv')
test = pd.read_csv('/kaggle/input/fashionmnist/fashion-mnist_test.csv')
train_dataloader = df_dataloader(train,BatchSize)
test_dataloader = df_dataloader(test,BatchSize)

In [10]:
class_names = ['T_shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [11]:
class ChannelAttention(nn.Module):
    def __init__(self,input_nc,rotio = 16):
        super().__init__()
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.maxpool = nn.AdaptiveMaxPool2d(1)
        self.share_mlp = nn.Sequential(nn.Conv2d(in_channels=input_nc,out_channels=input_nc//rotio,kernel_size=1,bias=False),
                                      nn.ReLU(inplace=True),
                                      nn.Conv2d(in_channels=input_nc//rotio,out_channels=input_nc,kernel_size=1,bias=False))
        self.sigmoid = nn.Sigmoid()
        
    def forward(self,x):
        avg_x = self.share_mlp(self.avgpool(x))
        max_x = self.share_mlp(self.maxpool(x))
        return self.sigmoid(avg_x+max_x)


class SpatialAttention(nn.Module):
    def __init__(self,kernel=7):
        super().__init__()
        assert kernel in (3,7),"Kernel size must be in 3 or 7"
        padding = 1 if kernel == 3 else 3
        self.cnn = nn.Conv2d(2,1,kernel_size=kernel,padding=padding,bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self,x):
        avg_out = torch.mean(x,dim=1,keepdim=True)
        max_out = torch.max(x,dim=1,keepdim=True).values
        x = torch.cat([avg_out,max_out],dim=1)
        x = self.cnn(x)
        return self.sigmoid(x)
    
class cbam(nn.Module):
    def __init__(self,input_nc):
        super().__init__()
        self.ca = ChannelAttention(input_nc)
        self.sa = SpatialAttention()
        
    def forward(self,x):
        x = self.ca(x) * x
        x = self.sa(x) * x
        return x
    
    def get_spatial(self,x):
        return self.sa(x)

In [12]:
class Net(nn.Module):
    def __init__(self,is_cbam = True):
        super().__init__()
        self.is_cbam = is_cbam
        self.cnn1 = nn.Conv2d(1,128,3,1,1)
        self.relu1 = nn.ReLU(True)
        self.maxpool1 = nn.MaxPool2d(2)
        self.cnn2 = nn.Conv2d(128,64,3,1,1)
        self.relu2 = nn.ReLU(True)
        self.maxpool2 = nn.MaxPool2d(2)
        self.cbam = cbam(128)
        self.linear = nn.Linear(64*7*7,10)
    
    def forward(self,x):
        x = self.relu1(self.cnn1(x))
        x = self.maxpool1(x)
        if self.is_cbam:
            x = self.cbam(x)
            spatial_mask = self.cbam.get_spatial(x)
        x = self.relu2(self.cnn2(x))
        x = self.maxpool2(x)
        x = self.linear(x.view(-1,64*7*7))
        return x,spatial_mask

In [13]:
net = Net(is_cbam=True).to(device)
optimizer = optim.Adam(net.parameters(),lr = lr)
loss_fn = nn.CrossEntropyLoss()

In [14]:
def train_fn(model,optimizer,loader,loss_fn):
    model.train()
    for batch,(x,y) in enumerate(loader):
        x = x.to(device)
        y = y.to(device)
        pred,_ = model(x)
        loss = loss_fn(pred,y.long())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_pred = loss.item()
        
        if batch % 50 == 0:
            print(f"{batch*len(x)}/{len(loader.dataset)}   Loss:{loss_pred}")

def valid_fn(model,loader,loss_fn):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for x,y in loader:
            x = x.to(device)
            y = y.to(device)
            valid_pred,_ = model(x)
            test_loss += loss_fn(valid_pred,y.long()).item()
            correct += (valid_pred.argmax(1)).eq(y).sum()
    
    test_loss/= len(loader.dataset)
    print(f"\nTest Result: Average Loss = {test_loss}, Accuracy = {correct}/{len(loader.dataset)} ({(correct/len(loader.dataset))*100}%)")

In [15]:
for epoch in range(EPOCH):
    print(f'\nEpoch: {epoch+1}')
    train_fn(net,optimizer,train_dataloader,loss_fn)
    valid_fn(net,test_dataloader,loss_fn)


Epoch: 1
0/60000   Loss:2.3027820587158203
6400/60000   Loss:0.9047666788101196
12800/60000   Loss:0.5710929036140442
19200/60000   Loss:0.3339461088180542
25600/60000   Loss:0.423809289932251
32000/60000   Loss:0.4011962115764618
38400/60000   Loss:0.4907625615596771
44800/60000   Loss:0.3648237884044647
51200/60000   Loss:0.3380748927593231
57600/60000   Loss:0.2888743281364441

Test Result: Average Loss = 0.0027547064200043677, Accuracy = 8795/10000 (87.94999694824219%)

Epoch: 2
0/60000   Loss:0.3701173961162567
6400/60000   Loss:0.2976725697517395
12800/60000   Loss:0.28099820017814636
19200/60000   Loss:0.30544421076774597
25600/60000   Loss:0.29877766966819763
32000/60000   Loss:0.3416312336921692
38400/60000   Loss:0.3143169581890106
44800/60000   Loss:0.24030272662639618
51200/60000   Loss:0.3140158951282501
57600/60000   Loss:0.3208971917629242

Test Result: Average Loss = 0.0024399831756949423, Accuracy = 8891/10000 (88.90999603271484%)

Epoch: 3
0/60000   Loss:0.1985483765