In [58]:
import pandas as pd
import os
# os.chdir("..")
# os.chdir("data")
df = pd.read_csv('./data/train_metadata.csv')
print(df.describe())

         Class Injured Gender  Item                      Filename
count      500     500    500   500                           500
unique       2       2      2     4                           500
top     Zombie   False   Male  None  train_dataset/test_00000.png
freq       257     312    260   207                             1


# train cnn example

In [59]:
import torch, timm, PIL,os, sklearn, gc
import sklearn.preprocessing
import sklearn.model_selection
from timm.scheduler import CosineLRScheduler
from tqdm import tqdm
import cv2
import pandas as pd
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
%matplotlib inline
import matplotlib.pyplot as plt

# define image transformations and augmentations (if you want)

In [60]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# define dataset

In [61]:

class Dataset(torch.utils.data.Dataset):
    def __init__(self, df, transforms, base_dir ="./data",):
        self.df = df
        self.base_dir = base_dir
        self.transforms = transforms
    def __len__(self):
        return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        
        img_path = row['Filename']
        target = row['state']
        
        image = Image.open(os.path.join(self.base_dir,img_path))
        image = train_transform(image)
        return image, torch.tensor(target).long()

# read data and get holdout set
would be ideal to set random state to ensure the splits are consistent

In [62]:
test_df = pd.read_csv('./data/test_metadata.csv')
train_df = pd.read_csv('./data/train_metadata.csv')

In [63]:
import sys; sys.path.append('../')
from endpoints.data_parser import datarow_to_state
from gameplay.enums import State
# encode categorical data into numbers
enc = sklearn.preprocessing.LabelEncoder()

train_df['state'] = train_df.apply(datarow_to_state, axis=1)
test_df['state'] = test_df.apply(datarow_to_state, axis=1)

enc.fit(train_df.state)
enc.classes_ = np.array([State.CORPSE.value,
                         State.ZOMBIE.value,
                         State.INJURED.value,
                         State.HEALTHY.value,
                         ], dtype=object)
train_df.state = enc.transform(train_df.state)
test_df.state = enc.transform(test_df.state)

# begin setting up training pipeline
use class weights since this is imbalanced classification

In [64]:
#hyperparams
model_name = 'resnet10t'#''maxvit_tiny_tf_512' #
batch_size = 16
learning_rate = 2e-4
weight_decay = 1e-8 # l2 regularization
epochs = 25
device = 'cuda' if torch.cuda.is_available() else 'cpu'
max_grad_norm = 100 #max gradient value

epochs_warmup=1

In [65]:
train_ds = Dataset(train_df,train_transform)
val_ds = Dataset(test_df,val_transform)
loader_train = torch.utils.data.DataLoader(train_ds, batch_size=batch_size,
                 num_workers=0, pin_memory=False, shuffle=True, drop_last=True)
loader_val = torch.utils.data.DataLoader(val_ds , batch_size=batch_size,
                 num_workers=0, pin_memory=False)

In [67]:
class_weights=sklearn.utils.class_weight.compute_class_weight('balanced',classes=np.unique(train_df.state),y=train_df.state)
class_weights=torch.tensor(class_weights,dtype=torch.float).to(device)

In [68]:
model =  timm.create_model(model_name, num_classes=len(class_weights),pretrained=True)
# Create a neural net class

# class Net(torch.nn.Module):
#     # Defining the Constructor
#     def __init__(self, num_classes=7):
#         super(Net, self).__init__()
#         self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
#         self.conv2 = torch.nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
#         self.conv3 = torch.nn.Conv2d(in_channels=24, out_channels=8, kernel_size=3, stride=1, padding=1)
#         self.pool = torch.nn.MaxPool2d(kernel_size=2)
#         self.drop = torch.nn.Dropout2d(p=0.2)
#         self.fc = torch.nn.Linear(in_features=(8* 64* 64), out_features=num_classes)

#     def forward(self, x):
#         x = torch.nn.functional.relu(self.pool(self.conv1(x))) 
#         x = torch.nn.functional.relu(self.pool(self.conv2(x)))  
#         x = torch.nn.functional.relu(self.pool(self.conv3(x)))  
#         x = torch.nn.functional.dropout(self.drop(x), training=self.training)
# #         print(x.shape)
#         x = x.view(-1, 8* 64* 64)
#         x = self.fc(x)
#         return x
# model = Net()
model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.CrossEntropyLoss(weight=class_weights)

nbatch = len(loader_train)
warmup = epochs_warmup * nbatch  # number of warmup steps
nsteps = epochs * nbatch        # number of total steps
scheduler = CosineLRScheduler(optimizer,
              warmup_t=warmup, warmup_lr_init=1e-6, warmup_prefix=True, # 1 epoch of warmup
              t_initial=(nsteps - warmup), lr_min=1e-6)                # 3 epochs of cosine

gc.collect()

91

# validation loop

In [69]:
def evaluate(model, loader_val):
    was_training = model.training
    model.eval()

    loss_sum = 0.0
    n_sum = 0
    y_all = []
    y_pred_all = []
    for img, y in loader_val:
        n = y.size(0)
        img = img.to(device)
        y = y.to(device)

        with torch.no_grad():
            y_pred = model(img)
        loss = criterion(y_pred, y)

        n_sum += n
        loss_sum += n * loss.item()
        
        
        y_all.append(y.cpu().detach().numpy())
        y_pred_all.append(torch.nn.functional.softmax(y_pred,1).cpu().detach().numpy())

        del loss, y_pred, img, y
        gc.collect()

    loss_val = loss_sum / n_sum

    y = np.concatenate(y_all)
    y_pred = np.concatenate(y_pred_all)

    ret = {'loss': loss_val,
           'y': y,
           'y_pred': y_pred,}
    gc.collect()
    return ret

# training loop

In [70]:
# out = model(torch.zeros(2,3,512,512))
# out

In [71]:
# torch.nn.functional.softmax(out,1)

In [72]:
best_loss = 1000
torch.cuda.empty_cache()
for iepoch in range(epochs):
    model.train()
    for ibatch, (img, y) in tqdm(enumerate(loader_train)):
        img = img.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        
        y_pred = model(img)
        loss = criterion(y_pred, y)

        loss_train = loss.item()

        loss.backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),max_grad_norm)

        optimizer.step()
        
        scheduler.step(iepoch * nbatch + ibatch + 1)
        
        gc.collect()
    validation_results = evaluate(model, loader_val)
    print(validation_results)
    best_loss = min(best_loss,validation_results['loss'])
    if best_loss ==validation_results['loss']:
        ofilename = 'test_trained'
        torch.save(model.state_dict(), ofilename)
    gc.collect()

0it [00:00, ?it/s]

Class                            Default
Injured                            False
Gender                              Male
Item                                 Bow
Filename    train_dataset/test_00301.png
state                                  3
Name: 301, dtype: object
Class                             Zombie
Injured                            False
Gender                              Male
Item                                None
Filename    train_dataset/test_00220.png
state                                  1
Name: 220, dtype: object
Class                             Zombie
Injured                            False
Gender                              Male
Item                                 Bow
Filename    train_dataset/test_00160.png
state                                  1
Name: 160, dtype: object
Class                             Zombie
Injured                            False
Gender                            Female
Item                               Staff
Filename    train_datas

1it [00:07,  7.30s/it]

Class                             Zombie
Injured                            False
Gender                            Female
Item                                None
Filename    train_dataset/test_00388.png
state                                  1
Name: 388, dtype: object
Class                             Zombie
Injured                            False
Gender                              Male
Item                                 Bow
Filename    train_dataset/test_00291.png
state                                  1
Name: 291, dtype: object
Class                            Default
Injured                             True
Gender                              Male
Item                                None
Filename    train_dataset/test_00118.png
state                                  2
Name: 118, dtype: object
Class                            Default
Injured                            False
Gender                              Male
Item                              Hammer
Filename    train_datas

2it [00:14,  7.31s/it]

Class                            Default
Injured                            False
Gender                            Female
Item                                None
Filename    train_dataset/test_00253.png
state                                  3
Name: 253, dtype: object
Class                             Zombie
Injured                             True
Gender                              Male
Item                                None
Filename    train_dataset/test_00159.png
state                                  0
Name: 159, dtype: object
Class                            Default
Injured                             True
Gender                              Male
Item                              Hammer
Filename    train_dataset/test_00230.png
state                                  2
Name: 230, dtype: object
Class                            Default
Injured                            False
Gender                              Male
Item                                 Bow
Filename    train_datas

2it [00:21, 10.83s/it]


KeyboardInterrupt: 

In [None]:
y_pred = np.array([np.argmax(x) for x in validation_results['y_pred']])
y_pred

In [None]:
y_pred[0]

In [None]:
validation_results['y']

# view confusion matrix of predicted values

In [None]:
matrix = sklearn.metrics.confusion_matrix(validation_results['y'],y_pred,)

In [None]:
matrix

In [None]:
sklearn.metrics.ConfusionMatrixDisplay(matrix,display_labels=  enc.classes_).plot()
plt.savefig('foo.png')
plt.show()