In [54]:
import numpy as np 
import torch
import torch.nn as nn
import torch.nn.functional as F 
import os 
from scipy import stats
import pandas as pd 

In [55]:
titanic_data =pd.read_csv('/kaggle/input/titanic/train.csv')
titanic_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [56]:
df=pd.concat([titanic_data,
             pd.get_dummies(titanic_data['Sex']).astype(int),
             pd.get_dummies(titanic_data['Embarked'],prefix='Embarked').astype(int),
             pd.get_dummies(titanic_data['Pclass'],prefix='class').astype(int)],axis=1)

In [58]:
df['Age']=df['Age'].fillna(df.Age.mean())
df['Fare']=df['Fare'].fillna(df.Fare.mean())

In [59]:
df_clean=df.drop(['PassengerId','Name','Ticket','Cabin','Sex','Embarked','Pclass'],axis=1)
df_clean.head()

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,female,male,Embarked_C,Embarked_Q,Embarked_S,class_1,class_2,class_3
0,0,22.0,1,0,7.25,0,1,0,0,1,0,0,1
1,1,38.0,1,0,71.2833,1,0,1,0,0,1,0,0
2,1,26.0,0,0,7.925,1,0,0,0,1,0,0,1
3,1,35.0,1,0,53.1,1,0,0,0,1,1,0,0
4,0,35.0,0,0,8.05,0,1,0,0,1,0,0,1


In [61]:
labels=df_clean['Survived'].to_numpy()

df_clean=df_clean.drop(['Survived'],axis=1)
data=df_clean.to_numpy()

In [63]:
feature_names=list(df_clean.columns)

In [64]:
np.random.seed(10)
train_indices=np.random.choice(len(labels),int(0.7*len(labels)),replace=False)
test_indices=list(set(range(len(labels)))-set(train_indices))

train_features=data[train_indices]
train_labels=labels[train_indices]

test_features=data[test_indices]
test_labels=labels[test_indices]

len(test_labels)

268

In [65]:
class Mish(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        x=x*(torch.tanh(F.softplus(x))) # 每层网络层使用Mish作为激活函数
        return x
    
torch.manual_seed(0)

class ThreelinearModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1=nn.Linear(12,12)
        self.mish1=Mish()
        self.linear2=nn.Linear(12,8)
        self.mish2=Mish()
        self.linear3=nn.Linear(8,2)
        self.softmax=nn.Softmax(dim=1)
        self.criterion=nn.CrossEntropyLoss()
        
    def forward(self,x):
        lin1_out=self.linear1(x)
        out1=self.mish1(lin1_out)
        out2=self.mish2(self.linear2(out1))
        return self.softmax(self.linear3(out2))
    
    def getloss(self,x,y):
        y_pred=self.forward(x)
        loss=self.criterion(y_pred,y)
        return loss
        


In [68]:
if __name__=='__main__':
    net=ThreelinearModel()
    num_epochs=200
    optimizer =torch.optim.Adam(net.parameters(),lr=0.04)
    
    input_tensor=torch.from_numpy(train_features).type(torch.FloatTensor)
    
    label_tensor =torch.from_numpy(train_labels)
    losses=[]
    for epoch in range(num_epochs):
        loss=net.getloss(input_tensor,label_tensor)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch %20==0:
            print('Epoch{}/{} => Loss: {:.2f}'.format(epoch+1,num_epochs,loss.item()))
    os.makedirs('models',exist_ok=True)
    torch.save(net.state_dict(),'models/titanic_model.pt')
    
    out_probs=net(input_tensor).detach().numpy()
    out_classes=np.argmax(out_probs,axis=1)
    print("accuracy：",sum(out_classes==train_labels)/len(train_labels))
    
    test_input_tensor=torch.from_numpy(test_features).type(torch.FloatTensor)
    out_probs=net(test_input_tensor).detach().numpy()
    out_classes=np.argmax(out_probs,axis=1)
    print("accuracy：",sum(out_classes==test_labels)/len(test_labels))

Epoch1/200 => Loss: 0.70
Epoch21/200 => Loss: 0.51
Epoch41/200 => Loss: 0.51
Epoch61/200 => Loss: 0.50
Epoch81/200 => Loss: 0.50
Epoch101/200 => Loss: 0.50
Epoch121/200 => Loss: 0.50
Epoch141/200 => Loss: 0.50
Epoch161/200 => Loss: 0.49
Epoch181/200 => Loss: 0.49
accuracy： 0.8025682182985554
accuracy： 0.8134328358208955


In [79]:
test=pd.read_csv('/kaggle/input/titanic/test.csv')
test_df=pd.concat([test,
             pd.get_dummies(test['Sex']).astype(int),
             pd.get_dummies(test['Embarked'],prefix='Embarked').astype(int),
             pd.get_dummies(test['Pclass'],prefix='class').astype(int)],axis=1)

test_df['Age']=test_df['Age'].fillna(df.Age.mean())
test_df['Fare']=test_df['Fare'].fillna(df.Fare.mean())


Id=test_df['PassengerId']
test_df_clean=test_df.drop(['PassengerId','Name','Ticket','Cabin','Sex','Embarked','Pclass'],axis=1)
pred_features=test_df_clean.to_numpy()

In [80]:
pred_input_tensor=torch.from_numpy(pred_features).type(torch.FloatTensor)
pred_out_probs=net(pred_input_tensor).detach().numpy()
pred_classes=np.argmax(pred_out_probs,axis=1)

In [83]:
submission= pd.DataFrame({
    'PassengerId': Id,
    'Survived': pred_classes[:],
})

In [84]:
# Save the submission file
submission.to_csv('submission.csv', index=False)