In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
from matplotlib import pyplot as plt

In [2]:
import pandas as pd
import numpy as np

In [3]:
X_train = pd.read_csv("x_train.csv")
y_train = pd.read_csv("y_train.csv")
X_test = pd.read_csv("x_test.csv")
X_train.head()

Unnamed: 0,id,timestamp_first_active,age,signup_flow,month_account_created,gender_FEMALE,gender_MALE,gender_OTHER,signup_method_basic,signup_method_facebook,...,first_browser_Silk,first_browser_SiteKiosk,first_browser_SlimBrowser,first_browser_Sogou Explorer,first_browser_Stainless,first_browser_TenFourFox,first_browser_TheWorld Browser,first_browser_UC Browser,first_browser_Yandex.Browser,first_browser_wOSBrowser
0,gxn3p5htnn,0.0,0.241611,0.0,0.454545,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
1,820tgsjxq7,0.004057,0.248322,0.0,0.363636,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,4ft3gnwmtx,0.005768,0.369128,0.12,0.727273,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,bjjt8pjhuk,0.014152,0.275168,0.0,1.0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,87mebub9p4,0.01767,0.268456,0.0,0.727273,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
y_train.head()

Unnamed: 0,country_AU,country_CA,country_DE,country_ES,country_FR,country_GB,country_IT,country_NDF,country_NL,country_PT,country_US,country_other
0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,1,0


In [5]:
id_user_train = X_train['id']
id_user_test = X_test['id']
X_train = X_train.drop(['id'], axis = 1)
X_test = X_test.drop(['id'], axis = 1)

In [6]:
X_train = np.array(X_train)
y_train = np.array(y_train)

In [7]:
X_train.shape

(213451, 138)

In [8]:
c = list(zip(X_train, y_train))
import random
random.shuffle(c)
X_train, y_train = zip(*c)

X_valid = X_train[170760:]
y_valid = y_train[170760:]
X_train = X_train[:170760]
y_train = y_train[:170760]

In [9]:
transform_x_train = torch.tensor(X_train, dtype=torch.float)
transform_y_train = torch.tensor(y_train, dtype=torch.float)

transform_x_valid = torch.tensor(X_valid, dtype = torch.float)
transform_y_valid = torch.tensor(y_valid, dtype = torch.float)

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device.".format(device))

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_chain = nn.Sequential(
            nn.Linear(138, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 12),
            nn.Softmax(dim = -1)
        )
    def forward(self, x):
        result = self.linear_chain(x)
        return result

model = NeuralNetwork().to(device)
print(model)

Using cpu device.
NeuralNetwork(
  (linear_chain): Sequential(
    (0): Linear(in_features=138, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=12, bias=True)
    (9): Softmax(dim=-1)
  )
)


In [11]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
batch_size = 64

def train(x_train, y_train, model, loss_fn, optimizer):
    size = x_train.shape[0]
    for t in range(size // batch_size):
        X = x_train[t*batch_size:min((t+1)*batch_size, size)]
        y = y_train[t*batch_size:min((t+1)*batch_size, size)]
        
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if t % 100 == 0:
            current = min((t+1)*batch_size, size)
            loss= loss.item()
            print(f"loss: {loss:>7f}, [ current: {current}/{size} ]")

In [12]:
epochs = 100
y_true = torch.argsort(transform_y_valid, dim = 1, descending=True)[:, 0]
history_accuracy = [0]
t = 0
while True:
    print(f"Epochs {t+1}\n.....................................")
    t+=1
    train(transform_x_train, transform_y_train, model, loss_fn, optimizer)
    pred = model(transform_x_valid)
    pred = torch.argsort(pred, dim = 1, descending = True)[:, 0]
    accuracy = (pred == y_true).sum() / y_true.shape[0]
    history_accuracy.append(accuracy.item())
    print(f"Accuracy on validation set: {accuracy.item():>7f}")
    
    if(history_accuracy[-1] - history_accuracy[-2] < 0.0001 or t > 100):
        break
print("Done!")

Epochs 1
.....................................
loss: 0.075677, [ current: 64/170760 ]
loss: 0.044078, [ current: 6464/170760 ]
loss: 0.045263, [ current: 12864/170760 ]
loss: 0.042304, [ current: 19264/170760 ]
loss: 0.044173, [ current: 25664/170760 ]
loss: 0.047586, [ current: 32064/170760 ]
loss: 0.046748, [ current: 38464/170760 ]
loss: 0.048328, [ current: 44864/170760 ]
loss: 0.039852, [ current: 51264/170760 ]
loss: 0.040523, [ current: 57664/170760 ]
loss: 0.044186, [ current: 64064/170760 ]
loss: 0.049223, [ current: 70464/170760 ]
loss: 0.042356, [ current: 76864/170760 ]
loss: 0.040456, [ current: 83264/170760 ]
loss: 0.037371, [ current: 89664/170760 ]
loss: 0.033693, [ current: 96064/170760 ]
loss: 0.053879, [ current: 102464/170760 ]
loss: 0.038571, [ current: 108864/170760 ]
loss: 0.048038, [ current: 115264/170760 ]
loss: 0.040418, [ current: 121664/170760 ]
loss: 0.043215, [ current: 128064/170760 ]
loss: 0.033959, [ current: 134464/170760 ]
loss: 0.045778, [ current: 

loss: 0.043334, [ current: 121664/170760 ]
loss: 0.043174, [ current: 128064/170760 ]
loss: 0.035846, [ current: 134464/170760 ]
loss: 0.044750, [ current: 140864/170760 ]
loss: 0.036968, [ current: 147264/170760 ]
loss: 0.036815, [ current: 153664/170760 ]
loss: 0.051179, [ current: 160064/170760 ]
loss: 0.033933, [ current: 166464/170760 ]
Accuracy on validation set: 0.611464
Epochs 8
.....................................
loss: 0.043895, [ current: 64/170760 ]
loss: 0.042710, [ current: 6464/170760 ]
loss: 0.042627, [ current: 12864/170760 ]
loss: 0.042200, [ current: 19264/170760 ]
loss: 0.045192, [ current: 25664/170760 ]
loss: 0.045946, [ current: 32064/170760 ]
loss: 0.043796, [ current: 38464/170760 ]
loss: 0.046380, [ current: 44864/170760 ]
loss: 0.041369, [ current: 51264/170760 ]
loss: 0.038016, [ current: 57664/170760 ]
loss: 0.040904, [ current: 64064/170760 ]
loss: 0.048250, [ current: 70464/170760 ]
loss: 0.042463, [ current: 76864/170760 ]
loss: 0.037643, [ current: 832

In [13]:
for ind, item in enumerate(history_accuracy):
    print(f"Epoch {ind}:\n \t Accuracy: {item}")

Epoch 0:
 	 Accuracy: 0
Epoch 1:
 	 Accuracy: 0.6021878123283386
Epoch 2:
 	 Accuracy: 0.6026094555854797
Epoch 3:
 	 Accuracy: 0.6028202772140503
Epoch 4:
 	 Accuracy: 0.6034995913505554
Epoch 5:
 	 Accuracy: 0.6036167144775391
Epoch 6:
 	 Accuracy: 0.6073645353317261
Epoch 7:
 	 Accuracy: 0.6114637851715088
Epoch 8:
 	 Accuracy: 0.6123304963111877
Epoch 9:
 	 Accuracy: 0.6120962500572205


In [14]:
label = pd.read_csv("y_train.csv")

label = list(label.columns)

In [15]:
label

['country_AU',
 'country_CA',
 'country_DE',
 'country_ES',
 'country_FR',
 'country_GB',
 'country_IT',
 'country_NDF',
 'country_NL',
 'country_PT',
 'country_US',
 'country_other']

In [16]:
transform_x_test = torch.tensor(X_test.values, dtype = torch.float)

id_user_test = np.array(id_user_test)

In [17]:
pred = model(transform_x_test)
ids = []
result = []
for i in range(len(id_user_test)):
    idx = id_user_test[i]
    ids += [idx] * 5
    temp = list(torch.argsort(pred[i], descending = True)[:5])
    for x in temp:
        result.append(label[x][8:])

In [18]:
len(ids), len(result)

(310480, 310480)

In [19]:
result

['NDF',
 'US',
 'other',
 'FR',
 'IT',
 'NDF',
 'US',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'NDF',
 'US',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'NDF',
 'US',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'NDF',
 'US',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',
 'FR',
 'IT',
 'US',
 'NDF',
 'other',


In [20]:
result_ = {'id': ids, 'country': result}

submission = pd.DataFrame(result_)

submission.to_csv("submission.csv", index=False)