In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [2]:
# Данные по соревнованию от Kaggle, состоящие из обучающего и тестового набора
train = pd.read_csv('datasets/train.csv', index_col='id')
test = pd.read_csv('datasets/test.csv', index_col='id')

In [3]:
# One-hot кодирование стобца color
object_train = train.select_dtypes(include='object')['color']
object_test = test.select_dtypes(include='object')['color']
num_train = train.select_dtypes(exclude='object')
num_test = test.select_dtypes(exclude = 'object')
one_hot_train = pd.get_dummies(object_train, dtype='float')
one_hot_test = pd.get_dummies(object_test, dtype='float')

In [4]:
# Объединение закодированных признаков
X_train = num_train.join(one_hot_train)
X_test = num_test.join(one_hot_test)
# Целевой признак
y_train = train['type']

In [5]:
# Кодирование целевого признака
y_train_categorical = train.select_dtypes(include='object')['type']
label_encoder_train = LabelEncoder().fit(y_train_categorical)
label_encoded_y = label_encoder_train.transform(y_train_categorical)
y_train = pd.Series(label_encoded_y, index=train.index, name='type')

In [6]:
# Перевод в Тензоры для нейросети
X_train = torch.FloatTensor(X_train.to_numpy())
X_test = torch.FloatTensor(X_test.to_numpy())
y_train = torch.LongTensor(y_train.to_numpy())

In [7]:
X_train.shape

torch.Size([371, 10])

In [8]:
class MonsterNet(torch.nn.Module):
    def __init__(self,n_input, n_hidden_neurons):
        super(MonsterNet, self).__init__()
        self.fc1 = torch.nn.Linear(n_input, n_hidden_neurons)
        self.activ1 = torch.nn.Sigmoid()
        self.fc2 = torch.nn.Linear(n_hidden_neurons, 3)
        self.sm = torch.nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.activ1(x)
        x = self.fc2(x)
        return x

    def inference(self, x):                       # Считает вероятности 
        x = self.forward(x)
        x = self.sm(x)
        return x
    
n_input =  10                                    # Количество входящих нейронов == количеству признаков
n_hidden =  10
monster_net = MonsterNet(n_input, n_hidden)

In [9]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(monster_net.parameters(), lr=1.0e-3)

In [10]:
batch_size = 16         # разделяет тренировочный датасет на батчи, последовательно обучаясь по ним, служит для препятвования запоминанию сети данных 

for epoch in range(2000):
    order = np.random.permutation(len(X_train))
    for start_index in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        
        batch_indexes = order[start_index:start_index+batch_size]
        
        x_batch = X_train[batch_indexes]
        y_batch = y_train[batch_indexes]
        
        preds = monster_net.forward(x_batch) 
        
        loss_value = loss(preds, y_batch)
        loss_value.backward()
        
        optimizer.step()
        

In [11]:
predict_monsters = monster_net.inference(X_test) # Предсказание вероятностей

In [12]:
predict_net = pd.DataFrame(predict_monsters.detach().numpy(), index=test.index) # Переводим вероятности в Dataframe

In [13]:
predict_net

Unnamed: 0_level_0,0,1,2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0.000036,0.906082,0.093883
6,0.130025,0.349295,0.520680
9,0.001514,0.718692,0.279794
10,0.259773,0.288508,0.451719
13,0.980591,0.000491,0.018918
...,...,...,...
893,0.003819,0.192505,0.803677
894,0.996694,0.000022,0.003284
895,0.000038,0.914268,0.085695
898,0.999279,0.000005,0.000716


In [14]:
predict_net = pd.Series(predict_net.idxmax(axis=1), name='type') # Определяет по наибольшей вероятности результат предсказания нейронной сети


In [15]:
predict_net

id
3      1
6      2
9      1
10     2
13     0
      ..
893    2
894    0
895    1
898    0
899    0
Name: type, Length: 529, dtype: int64

In [16]:
# Преобразование результатов обратно в категориальный признак
monst = {0: 'Ghost', 1: 'Ghoul', 2: 'Goblin'}
predict_net = predict_net.map(monst)

In [17]:
# Перевод в csv файл
predict_net.reset_index().to_csv(r'predicted/predict_monsters_neuron.csv', index=False)