In [1]:
import numpy as np
import pandas as pd
# import seaborn as sns
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.preprocessing import MinMaxScaler, LabelEncoder    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
df = pd.read_csv("train.csv")
test_df = pd.read_csv('test.csv')
df.head()

Unnamed: 0,id,Hit Point,Attack Point,Recovery,Total,Attribute,Race
0,977,1687,1656,489,3832,Water,Demon
1,72,185,87,159,431,Light,Elf
2,69,584,313,430,1327,Fire,Elf
3,493,1880,859,373,3112,Earth,Elf
4,443,905,364,49,1318,Water,Beast


In [3]:
race_encoder = LabelEncoder()
attr_encoder = LabelEncoder()
race_encoder.fit(df['Race'].to_numpy())
attr_encoder.fit(df['Attribute'].to_numpy())
print(list(race_encoder.classes_))
print(list(attr_encoder.classes_))
df['Race'] = race_encoder.transform(df['Race'])
df['Attribute'] = attr_encoder.transform(df['Attribute'])
test_df['Attribute'] = attr_encoder.transform(test_df['Attribute'])
df

['Beast', 'Demon', 'Dragon', 'Elf', 'God', 'Human', 'Machina', 'Material']
['Dark', 'Earth', 'Fire', 'Light', 'Water']


Unnamed: 0,id,Hit Point,Attack Point,Recovery,Total,Attribute,Race
0,977,1687,1656,489,3832,4,1
1,72,185,87,159,431,3,3
2,69,584,313,430,1327,2,3
3,493,1880,859,373,3112,1,3
4,443,905,364,49,1318,4,0
...,...,...,...,...,...,...,...
995,1050,1473,1898,356,3727,2,1
996,999,2600,100,100,2800,1,7
997,27,1133,475,128,1736,2,0
998,789,3063,1241,355,4659,1,4


In [4]:
X = df.iloc[:, 1:-1]
y = df.iloc[:, -1]
X_test = test_df.iloc[:,1:]
test_id = test_df.iloc[:,0]

In [5]:
X_test

Unnamed: 0,Hit Point,Attack Point,Recovery,Total,Attribute
0,1200,800,300,2300,0
1,1708,859,823,3390,1
2,1259,651,251,2161,3
3,896,397,48,1341,0
4,2545,1625,380,4550,0
...,...,...,...,...,...
495,3322,1519,374,5215,1
496,309,158,55,522,3
497,293,129,47,469,1
498,506,136,4,646,2


In [6]:
NUM_FEATURE = X.shape[1]
NUM_CLASS = len(np.unique(y))
# Split into train+val 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

print(X_train.shape)
print(X_val.shape)


(800, 5)
(200, 5)


In [7]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test = np.array(X_test)

In [8]:
class ClassifierDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = torch.from_numpy(X_data)
        self.y_data = torch.from_numpy(y_data)
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [9]:
train_data = ClassifierDataset(X_train, y_train)
val_data = ClassifierDataset(X_val, y_val)
train_loader = DataLoader(train_data, batch_size = 64, shuffle = True)
valid_loader = DataLoader(train_data, batch_size = 64, shuffle = False)

In [10]:
class DNN(nn.Module):
    def __init__(self, num_feature, num_class):
        super(DNN, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
model = DNN(NUM_FEATURE, NUM_CLASS)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model

DNN(
  (layer_1): Linear(in_features=5, out_features=512, bias=True)
  (layer_2): Linear(in_features=512, out_features=128, bias=True)
  (layer_3): Linear(in_features=128, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=8, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (batchnorm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [13]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)

    return acc

In [14]:
import datetime
print("start time:", datetime.datetime.now().strftime("%D:%H:%M:%S"))
for epoch in tqdm(range(500)):
    
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    model.train()
    for i, (X_train, y_train) in enumerate(train_loader):
        X_train, y_train = X_train.to(device), y_train.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_train.float())
        train_loss = criterion(y_pred, y_train.long())
        train_acc = multi_acc(y_pred, y_train)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
    train_epoch_acc = train_epoch_acc/(i+1)
        
    # VALIDATION    
    with torch.no_grad():
        
        val_epoch_loss = 0
        val_epoch_acc = 0

        model.eval()
        for i,(X_val, y_val) in enumerate(valid_loader):
            X_val, y_val = X_val.to(device), y_val.to(device)

            y_pred = model(X_val.float())

            val_loss = criterion(y_pred, y_val.long())
            val_acc = multi_acc(y_pred, y_val)
            val_epoch_loss += val_loss.item()
            val_epoch_acc += val_acc.item()
        val_epoch_acc = val_epoch_acc/(i+1)
    print("epoch %3d: training loss: %.3f | training accuracy: %.3f | validation loss: %.3f | validation accuracy: %.3f"%(epoch, train_epoch_loss, train_epoch_acc,val_epoch_loss, val_epoch_acc))

print("end time:", datetime.datetime.now().strftime("%D:%H:%M:%S"))


start time: 08/02/21:17:37:55


  0%|          | 0/500 [00:00<?, ?it/s]

epoch   0: training loss: 24.753 | training accuracy: 0.298 | validation loss: 26.127 | validation accuracy: 0.272
epoch   1: training loss: 19.286 | training accuracy: 0.554 | validation loss: 22.928 | validation accuracy: 0.444
epoch   2: training loss: 16.687 | training accuracy: 0.615 | validation loss: 18.247 | validation accuracy: 0.608
epoch   3: training loss: 15.312 | training accuracy: 0.618 | validation loss: 14.966 | validation accuracy: 0.662
epoch   4: training loss: 14.026 | training accuracy: 0.639 | validation loss: 13.008 | validation accuracy: 0.702
epoch   5: training loss: 12.970 | training accuracy: 0.665 | validation loss: 11.793 | validation accuracy: 0.737
epoch   6: training loss: 12.510 | training accuracy: 0.663 | validation loss: 11.040 | validation accuracy: 0.714
epoch   7: training loss: 11.851 | training accuracy: 0.659 | validation loss: 10.539 | validation accuracy: 0.731
epoch   8: training loss: 11.554 | training accuracy: 0.685 | validation loss: 1

epoch  73: training loss: 6.796 | training accuracy: 0.786 | validation loss: 5.146 | validation accuracy: 0.857
epoch  74: training loss: 8.354 | training accuracy: 0.748 | validation loss: 5.184 | validation accuracy: 0.849
epoch  75: training loss: 7.439 | training accuracy: 0.769 | validation loss: 5.176 | validation accuracy: 0.867
epoch  76: training loss: 7.355 | training accuracy: 0.792 | validation loss: 5.184 | validation accuracy: 0.855
epoch  77: training loss: 6.396 | training accuracy: 0.803 | validation loss: 5.069 | validation accuracy: 0.861
epoch  78: training loss: 6.968 | training accuracy: 0.796 | validation loss: 4.963 | validation accuracy: 0.875
epoch  79: training loss: 7.377 | training accuracy: 0.782 | validation loss: 5.236 | validation accuracy: 0.843
epoch  80: training loss: 6.915 | training accuracy: 0.794 | validation loss: 4.992 | validation accuracy: 0.849
epoch  81: training loss: 7.206 | training accuracy: 0.787 | validation loss: 5.006 | validation

epoch 147: training loss: 5.860 | training accuracy: 0.828 | validation loss: 4.354 | validation accuracy: 0.864
epoch 148: training loss: 5.840 | training accuracy: 0.828 | validation loss: 4.149 | validation accuracy: 0.871
epoch 149: training loss: 5.994 | training accuracy: 0.825 | validation loss: 3.948 | validation accuracy: 0.883
epoch 150: training loss: 5.872 | training accuracy: 0.821 | validation loss: 3.890 | validation accuracy: 0.893
epoch 151: training loss: 6.247 | training accuracy: 0.814 | validation loss: 4.019 | validation accuracy: 0.871
epoch 152: training loss: 5.894 | training accuracy: 0.812 | validation loss: 3.930 | validation accuracy: 0.892
epoch 153: training loss: 6.081 | training accuracy: 0.828 | validation loss: 3.990 | validation accuracy: 0.882
epoch 154: training loss: 6.328 | training accuracy: 0.816 | validation loss: 4.057 | validation accuracy: 0.881
epoch 155: training loss: 5.419 | training accuracy: 0.826 | validation loss: 3.891 | validation

epoch 221: training loss: 5.925 | training accuracy: 0.819 | validation loss: 3.648 | validation accuracy: 0.892
epoch 222: training loss: 4.941 | training accuracy: 0.856 | validation loss: 3.591 | validation accuracy: 0.889
epoch 223: training loss: 4.851 | training accuracy: 0.861 | validation loss: 3.422 | validation accuracy: 0.901
epoch 224: training loss: 5.694 | training accuracy: 0.831 | validation loss: 3.419 | validation accuracy: 0.907
epoch 225: training loss: 5.466 | training accuracy: 0.834 | validation loss: 3.547 | validation accuracy: 0.898
epoch 226: training loss: 6.690 | training accuracy: 0.781 | validation loss: 3.597 | validation accuracy: 0.901
epoch 227: training loss: 6.043 | training accuracy: 0.814 | validation loss: 3.615 | validation accuracy: 0.891
epoch 228: training loss: 5.693 | training accuracy: 0.837 | validation loss: 3.431 | validation accuracy: 0.913
epoch 229: training loss: 5.911 | training accuracy: 0.826 | validation loss: 3.566 | validation

epoch 294: training loss: 5.378 | training accuracy: 0.841 | validation loss: 3.317 | validation accuracy: 0.911
epoch 295: training loss: 4.909 | training accuracy: 0.844 | validation loss: 3.317 | validation accuracy: 0.897
epoch 296: training loss: 4.744 | training accuracy: 0.845 | validation loss: 3.186 | validation accuracy: 0.909
epoch 297: training loss: 5.039 | training accuracy: 0.829 | validation loss: 3.074 | validation accuracy: 0.918
epoch 298: training loss: 5.780 | training accuracy: 0.808 | validation loss: 3.125 | validation accuracy: 0.918
epoch 299: training loss: 5.603 | training accuracy: 0.820 | validation loss: 3.264 | validation accuracy: 0.898
epoch 300: training loss: 5.257 | training accuracy: 0.853 | validation loss: 3.227 | validation accuracy: 0.900
epoch 301: training loss: 5.006 | training accuracy: 0.840 | validation loss: 3.310 | validation accuracy: 0.889
epoch 302: training loss: 5.116 | training accuracy: 0.839 | validation loss: 3.252 | validation

epoch 368: training loss: 5.483 | training accuracy: 0.840 | validation loss: 3.152 | validation accuracy: 0.903
epoch 369: training loss: 5.317 | training accuracy: 0.841 | validation loss: 3.219 | validation accuracy: 0.900
epoch 370: training loss: 5.341 | training accuracy: 0.828 | validation loss: 3.092 | validation accuracy: 0.907
epoch 371: training loss: 5.257 | training accuracy: 0.820 | validation loss: 3.115 | validation accuracy: 0.904
epoch 372: training loss: 4.365 | training accuracy: 0.874 | validation loss: 3.021 | validation accuracy: 0.906
epoch 373: training loss: 4.737 | training accuracy: 0.856 | validation loss: 2.983 | validation accuracy: 0.922
epoch 374: training loss: 5.844 | training accuracy: 0.816 | validation loss: 2.884 | validation accuracy: 0.929
epoch 375: training loss: 5.390 | training accuracy: 0.846 | validation loss: 3.089 | validation accuracy: 0.923
epoch 376: training loss: 5.188 | training accuracy: 0.833 | validation loss: 3.083 | validation

epoch 441: training loss: 4.296 | training accuracy: 0.875 | validation loss: 2.715 | validation accuracy: 0.918
epoch 442: training loss: 4.752 | training accuracy: 0.852 | validation loss: 2.799 | validation accuracy: 0.916
epoch 443: training loss: 4.539 | training accuracy: 0.862 | validation loss: 2.793 | validation accuracy: 0.929
epoch 444: training loss: 4.559 | training accuracy: 0.853 | validation loss: 2.747 | validation accuracy: 0.929
epoch 445: training loss: 4.658 | training accuracy: 0.865 | validation loss: 2.726 | validation accuracy: 0.928
epoch 446: training loss: 4.812 | training accuracy: 0.850 | validation loss: 2.765 | validation accuracy: 0.933
epoch 447: training loss: 4.442 | training accuracy: 0.847 | validation loss: 2.706 | validation accuracy: 0.930
epoch 448: training loss: 4.315 | training accuracy: 0.876 | validation loss: 2.790 | validation accuracy: 0.917
epoch 449: training loss: 5.181 | training accuracy: 0.823 | validation loss: 2.771 | validation

In [16]:
model.eval()
with torch.no_grad():
    y_pred = model(torch.from_numpy(X_test).float().to(device))

y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
_, y_pred_tags = torch.max(y_pred_softmax, dim = 1) 
ans = pd.DataFrame({'id':test_id, 'Race':race_encoder.inverse_transform(y_pred_tags.cpu())})
ans

Unnamed: 0,id,Race
0,504,Material
1,828,Elf
2,907,Human
3,775,Beast
4,955,Human
...,...,...
495,1476,God
496,972,Human
497,323,Human
498,78,Beast


In [17]:
ans.to_csv("./submission.csv", index=False)