In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torchmetrics

In [None]:
DATA_DIR = pathlib.Path("../input/sdaia-kaust-ai-august-school-2022")


train_df = pd.read_csv(DATA_DIR / "train.csv", dtype="uint8")
test_df = pd.read_csv(DATA_DIR / "test.csv", dtype="uint8")


train_features = train_df.drop("labels", axis=1)
train_target = train_df.loc[:, "labels"]

In [None]:
class DataSetWithTransforms(Dataset): #like minsit
    
    def __init__(self, features, target, feature_transforms=None):
        super().__init__()
        self._features = features
        self._target = torch.from_numpy(target).long()
        self._feature_transforms = feature_transforms
        
    def __getitem__(self, index):
        if self._feature_transforms is None:
            features = self._features[index]
            #feature = torch.rashape(feature, (32,32,3))
        else: 
            features = self._feature_transforms(self._features[index])
        target = self._target[index]
        return (features, target) 
    
    def __len__(self):
        n_samples, _ = self._features.shape
        return n_samples

In [None]:
class DataSetTest(Dataset):
    
    def __init__(self, features, feature_transforms=None):
        super().__init__()
        self._features = features
        self._feature_transforms = feature_transforms
        
    def __getitem__(self, index):
        if self._feature_transforms is None:
            features = self._features[index]
            #feature = torch.rashape(feature, (32,32,3))
        else: 
            features = self._feature_transforms(self._features[index])
        return (features) 
    
    def __len__(self):
        n_samples, _ = self._features.shape
        return n_samples

In [None]:
# data augmentation should only apply to training data
_feature_transforms = transforms.Compose([
    
    transforms.Lambda(lambda array: array.reshape((32, 32))),
    transforms.ToPILImage(),
    #
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), shear=15, scale=(1.0, 1.1)),
    #transforms.RandomRotation(15),
    #transforms.ColorJitter(saturation=.05, hue=.05),
    #transforms.RandomPerspective(distortion_scale=0.6, p=1.0),
    #
    transforms.ToTensor(),
])
    
train_dataset = DataSetWithTransforms(train_features.values, train_target.values, _feature_transforms)


In [None]:
train_loader=DataLoader(train_dataset,4)

In [None]:
for batch in train_loader:
    print(batch[1].shape, batch[0].shape)
    break

In [None]:
# conveting test data only to tensor images should only apply to training data
_feature_transforms_test = transforms.Compose([
    transforms.Lambda(lambda array: array.reshape((32, 32))),
    transforms.ToPILImage(),
    transforms.ToTensor(),
])

test_dataset = DataSetTest(test_df.values, _feature_transforms_test)

In [None]:
test_loader=DataLoader(test_dataset,4)

In [None]:
#for batch in test_loader:
   # print(batch.shape)

In [None]:
valid_size = 0.2
shuffle=True
random_seed=2000
batch_sz = 4

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

if shuffle == True:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
        
train_idx, valid_idx = indices[split:], indices[:split]        

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,5,5,1)
        self.conv2 = nn.Conv2d(5,32,5,1)
        self.L1 = nn.Linear(32*5*5,100) 
        self.L2 = nn.Linear(100,29)

    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x,(2,2))

        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,(2,2))

        x = x.view(-1,32*5*5)
        x = self.L1(x)
        x = F.relu(x)
        x = self.L2(x)
        
        return x

In [None]:
device = torch.device("cpu")
net = CNN() 
net = net.to(device)


optimizer = optim.SGD(net.parameters(), lr=0.01 , momentum=0)

num_epochs = 43
ls = []

for i in range(num_epochs):
    total_loss = 0
    for batch in train_loader:
        X = batch[0].to(device) 
        Y = batch[1].to(device)

        logits = net(X)
        loss = F.cross_entropy(logits , Y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
    ls.append(total_loss) # the points 

print(total_loss)    
plt.plot(ls) # drow the graph

In [None]:
acc_val=0
total_val=0
for batch in train_loader:
    X = batch[0].to(device) 
    label = batch[1].to(device)
    
    logits = net(X)
    out=torch.argmax(logits, dim=1)
    acc_val+=torch.sum(out==label)
    total_val+=logits.shape[0]


In [None]:
print(acc_val/total_val)

In [None]:
prediction_list = list()
with torch.no_grad():
    for i,batch in enumerate(test_loader):
        data=batch
        data= data.to(device)
        logits = net(data)
        out=torch.argmax(logits, dim=1)
        prediction_list.append(out)
prediction_list = torch.cat(prediction_list).cpu()

test_features = pd.read_csv(DATA_DIR / "test.csv")
_ = (pd.DataFrame
       .from_dict({"Id": test_features.index, "Category": prediction_list})
       .to_csv("submission.csv", index=False))

## alternatively you could replace test_features.index by torch.arange(3360) 