In [49]:
# Load Titanic Dataset and import necessary packs
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os
from sklearn import preprocessing

def csv2npy(data): 
    # Datatype of csv is dataframe while that of a single column is series
    # Which can be transformed into np array directly
    # So here we make a function to transform csv directly to np array
    column = data.columns
    npy = np.array([data[column[0]]]) # This ensures that npy be a matrix, not vector
    for col in column[1:]:
        npy = np.concatenate((npy,np.array([data[col]])),axis = 0)
    npy = npy.T # Which made every single element of npy be a list of attribute of a man
    return npy

#titanic_data = pd.read_csv('../input/titanic/train.csv')

def dataprocess(titanic_data):
    titanic_data = titanic_data.drop(['PassengerId','Cabin','Name','Ticket','Embarked'],axis = 1)
    # When evaluating test data, we make this column to avoid error
    if 'Survived' not in titanic_data.columns:
        titanic_data['Survived'] = 0
    titnp_s = np.array(titanic_data['Survived']) # Survived data
    titnp_d = csv2npy(titanic_data.drop(['Survived'],axis = 1)) # Pure information
    
    # This part is to change gender into one-hot encoding form
    titnp_d = titnp_d.T
    le = preprocessing.LabelEncoder()
    titnp_d[1] = le.fit_transform(titnp_d[1])
    titnp_d = titnp_d.T

    #'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare'

    # Processing data.
    # The flow is csv -> numpy array -> list -> tensor.
    
    # Numpy to list
    titl_d = titnp_d.tolist()
    titl_s = titnp_s.tolist()
    # List to tensor
    tit_d = torch.Tensor(titl_d).type(torch.FloatTensor)
    tit_s = torch.Tensor(titl_s).type(torch.LongTensor)
    
    # Washing data
    tit_d = tit_d.T
    for att in range(tit_d.shape[0]):
        # Replacing all 'nan' with the first man's attribution
        tit_d[att] = torch.where(torch.isnan(tit_d[att]), torch.full_like(tit_d[att], tit_d[att][0]), tit_d[att])
        # Do normalization, which made each weight of vector less than 1
        for a in range(tit_d.shape[1]):
            tit_d[att][a] /= torch.max(tit_d[att])
    tit_d = tit_d.T
    
    return [tit_d,tit_s]

tit_d = dataprocess(pd.read_csv('../input/titanic/train.csv'))[0]
tit_s = dataprocess(pd.read_csv('../input/titanic/train.csv'))[1]

class Net(torch.nn.Module):
    def __init__(self,n_input,n_hidden,n_output):
        # Three hidder layers
        super(Net,self).__init__()
        self.hidden1 = torch.nn.Linear(n_input,n_hidden)
        self.hidden2 = torch.nn.Linear(n_hidden,n_hidden)
        self.hidden3 = torch.nn.Linear(n_hidden,n_hidden)
        self.predict = torch.nn.Linear(n_hidden,n_output)

    def forward(self, input):
        # Using sigmoid function
        out = self.hidden1(input)
        out = F.sigmoid(out)
        out = self.hidden2(out)
        out = F.sigmoid(out)
        out = self.hidden3(out)
        out = F.sigmoid(out)
        out = self.predict(out)
        return out

net = Net(6,20,2)
optimizer = torch.optim.SGD(net.parameters(),lr=0.02)
loss_func = torch.nn.CrossEntropyLoss()
for t in range(100000):
    out = net(tit_d)
    loss = loss_func(out,tit_s)
    # print the current circunstance time to time
    if t % 10000 == 0:
        print('Train Epoch: {} \tLoss: {:.6f};'.format(
                    t, loss))
        prediction = torch.max(F.softmax(out),dim = 1)[1]
        pred_y = prediction.data.numpy().squeeze()
        target_y = tit_s.data.numpy()
        print('Accuracy: {}.'.format(sum(pred_y == target_y))) # MSE
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Make file
test_d = dataprocess(pd.read_csv('../input/titanic/test.csv'))[0]
pred_test = torch.max(F.softmax(net(test_d)),dim = 1)[1]
surv_pred = pred_test.tolist()
pass_id = [i+892 for i in range(len(surv_pred))]
dataframe = pd.DataFrame({'PassengerId':pass_id,'Survived':surv_pred})
dataframe.to_csv("./prediction.csv",index = False,sep = ',')