使用神经网络解决Titanic问题，主要用两种实现，一种是纯pytorch实现神经网络的搭建，另一种是使用skorch包装好的以pytorch为后端的API实现。

In [73]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import time
import copy
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import warnings
import torch
torch.manual_seed(0)
warnings.filterwarnings("ignore")
print(torch.__version__)

0.4.1


读入数据文件，这里省去了特征工程的部分，特征采用之前特征工程提取出的特征。

In [74]:
train=pd.read_csv('train_features.csv')
test=pd.read_csv('test_features.csv')
train.head()

Unnamed: 0,PassengerId,Pclass,Sex,Title,Family_size,Family_Survival,FareBin_Code,AgeBin_Code,Embarked_C,Embarked_None,Embarked_Q,Embarked_S,Survived
0,1,3,0,0,2,0.5,0,2,0,0,0,1,0.0
1,2,1,1,2,2,0.5,4,3,1,0,0,0,1.0
2,3,3,1,1,1,0.5,1,2,0,0,0,1,1.0
3,4,1,1,2,2,0.0,4,3,0,0,0,1,1.0
4,5,3,0,0,1,0.5,1,3,0,0,0,1,0.0


In [75]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 13 columns):
PassengerId        891 non-null int64
Pclass             891 non-null int64
Sex                891 non-null int64
Title              891 non-null int64
Family_size        891 non-null int64
Family_Survival    891 non-null float64
FareBin_Code       891 non-null int64
AgeBin_Code        891 non-null int64
Embarked_C         891 non-null int64
Embarked_None      891 non-null int64
Embarked_Q         891 non-null int64
Embarked_S         891 non-null int64
Survived           891 non-null float64
dtypes: float64(2), int64(11)
memory usage: 90.6 KB


In [76]:
train.describe()

Unnamed: 0,PassengerId,Pclass,Sex,Title,Family_size,Family_Survival,FareBin_Code,AgeBin_Code,Embarked_C,Embarked_None,Embarked_Q,Embarked_S,Survived
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,2.308642,0.352413,0.686869,1.904602,0.519641,1.98541,2.433221,0.188552,0.002245,0.08642,0.722783,0.383838
std,257.353842,0.836071,0.47799,0.97245,1.613459,0.323961,1.411355,1.370957,0.391372,0.047351,0.281141,0.447876,0.486592
min,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,223.5,2.0,0.0,0.0,1.0,0.5,1.0,2.0,0.0,0.0,0.0,0.0,0.0
50%,446.0,3.0,0.0,0.0,1.0,0.5,2.0,2.0,0.0,0.0,0.0,1.0,0.0
75%,668.5,3.0,1.0,1.0,2.0,0.5,3.0,3.0,0.0,0.0,0.0,1.0,1.0
max,891.0,3.0,1.0,4.0,11.0,1.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0


In [77]:
X_train=train.drop(['PassengerId','Survived'],axis=1).as_matrix()
Y_train=train['Survived'].astype(int).as_matrix()

X_test=test.drop(['PassengerId','Survived'],axis=1).as_matrix()
IDtest=test['PassengerId']

# scalar
scaler=MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

# 使用Pytorch搭建模型

## 数据载入和处理

In [78]:
class TitanicFeaturesDataset(Dataset):
    def __init__(self,X,y=None,transform=None):
        self.X=X
        self.y=y
        self.transform=transform
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self,idx):
        if self.y is not None:
            sample={'X':self.X[idx],'y':self.y[idx]}
        else:
            sample={'X':self.X[idx]}
        if self.transform is not None:
            sample=self.transform(sample)
        return sample

In [79]:
# transform class
class ToTensor(object):
    def __call__(self,sample):
        if 'y' in sample.keys():
            X,y=sample['X'],sample['y']
            return {
                'X':torch.from_numpy(X.astype(np.float32)),
                'y':torch.squeeze(torch.from_numpy(np.array([y.astype(np.int64)])))
            }
        else:
            X=sample['X']
            return {
                'X':torch.from_numpy(X.astype(np.float32))
            }

In [80]:
# Model class
# 自定义Model需要继承nn.Module
class ClassifierModule(nn.Module):
    def __init__(self,D_in=11,D_out=2,num_units=20,nonlin=F.relu,dropout=0.5):
        super(ClassifierModule,self).__init__()
        self.num_units=num_units
        self.nonlin=nonlin
        self.dropout=nn.Dropout(dropout)
        
        self.linear1=nn.Linear(D_in,num_units)
        self.linear2=nn.Linear(num_units,10)
        self.output=nn.Linear(10,2)
        
    def forward(self,X):
        X=self.nonlin(self.linear1(X))
        X=self.dropout(X)
        X=self.nonlin(self.linear2(X))
        X=self.dropout(X)
        X=self.output(X)
        return X

In [81]:
# train model
def train_model(model,criterion,optimiizer,scheduler,dataset_sizes,num_epochs=100,device='cpu'):
    start=time.time()
    best_model_wts=copy.deepcopy(model.state_dict())
    best_acc=0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch,num_epochs-1))
        print('-'*10)
        
        for phase in ['train','val']:
            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()
            running_loss=0.0
            running_corrects=0
            for sample_batches in dataloaders[phase]:
                inputs=sample_batches['X'].to(device)
                labels=sample_batches['y'].to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase=='train'):
                    outputs=model(inputs)
                    _,preds=torch.max(outputs,1)
                    loss=criterion(outputs,labels)
                    
                    if phase=='train':
                        loss.backward()
                        optimizer.step()
                    running_loss+=loss.item()*inputs.size(0)
                    running_corrects+=torch.sum(preds==labels.data)
            epoch_loss=running_loss/dataset_sizes[phase]
            epoch_acc=running_corrects.double()/dataset_sizes[phase]
            print('{} loss: {:.4f} Acc: {:.4f}'.format(phase,epoch_loss,epoch_acc))
            
            if phase=='val' and epoch_acc>best_acc:
                best_acc=epoch_acc
                best_model_wts=copy.deepcopy(model.state_dict())
    time_elapsed=time.time()-start
    print('Trainning complete in {:.0f}m {:.0f}s'.format(
            time_elapsed//60,time_elapsed%60))
    print('Best val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(best_model_wts)
    return model              
                

In [82]:
# load data and train
train_dataset_len=X_train.shape[0]
train_len=train_dataset_len*4//5

transformed_datasets = {
        'train': TitanicFeaturesDataset(X_train[:train_len],Y_train[:train_len], transform=transforms.Compose([ToTensor()])),
        'val': TitanicFeaturesDataset(X_train[train_len:], Y_train[train_len:], transform=transforms.Compose([ToTensor()])),
        'test':TitanicFeaturesDataset(X_test,transform=transforms.Compose([ToTensor()]))
        }

dataloaders = {x: DataLoader(transformed_datasets[x], batch_size=16,
                                 shuffle=True, num_workers=0)
                   for x in ['train', 'val']}
dataloaders['test']=DataLoader(transformed_datasets['test'],batch_size=16,shuffle=False,num_workers=0)

dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val','test']}
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


In [83]:
model=ClassifierModule()
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=2e-2,momentum=0.9)
exp_lr_scheduler=lr_scheduler.StepLR(optimizer,step_size=7,gamma=0.1)

model=train_model(model,criterion,optimizer,exp_lr_scheduler,dataset_sizes,num_epochs=100,device=device)


Epoch 0/99
----------
train loss: 0.6776 Acc: 0.5688
val loss: 0.6443 Acc: 0.6425
Epoch 1/99
----------
train loss: 0.6423 Acc: 0.6096
val loss: 0.5799 Acc: 0.6425
Epoch 2/99
----------
train loss: 0.6237 Acc: 0.6180
val loss: 0.5448 Acc: 0.8268
Epoch 3/99
----------
train loss: 0.6081 Acc: 0.6840
val loss: 0.4879 Acc: 0.8436
Epoch 4/99
----------
train loss: 0.5794 Acc: 0.7008
val loss: 0.4660 Acc: 0.8547
Epoch 5/99
----------
train loss: 0.5622 Acc: 0.7205
val loss: 0.4254 Acc: 0.8659
Epoch 6/99
----------
train loss: 0.5367 Acc: 0.7275
val loss: 0.3823 Acc: 0.8771
Epoch 7/99
----------
train loss: 0.5045 Acc: 0.7570
val loss: 0.3828 Acc: 0.8715
Epoch 8/99
----------
train loss: 0.5299 Acc: 0.7542
val loss: 0.3919 Acc: 0.8492
Epoch 9/99
----------
train loss: 0.5182 Acc: 0.7528
val loss: 0.3942 Acc: 0.8547
Epoch 10/99
----------
train loss: 0.5024 Acc: 0.7683
val loss: 0.3924 Acc: 0.8492
Epoch 11/99
----------
train loss: 0.5073 Acc: 0.7626
val loss: 0.3918 Acc: 0.8492
Epoch 12/99
--

val loss: 0.3836 Acc: 0.8603
Epoch 99/99
----------
train loss: 0.4981 Acc: 0.7626
val loss: 0.3836 Acc: 0.8603
Trainning complete in 0m 9s
Best val Acc: 0.877095


In [84]:
# save model and reload it
torch.save(model.state_dict(),'pytorch_model.pth')
reloaded_model=ClassifierModule()
reloaded_model.load_state_dict(torch.load('pytorch_model.pth'))
# predicts on  test dataset

reloaded_model.eval()
final_predicts=[]
with torch.no_grad():
    for samples in dataloaders['test']:
        inputs=samples['X'].to(device)
        outputs=reloaded_model(inputs)
        _,preds=torch.max(outputs,1)
        for y_predict in list(preds.numpy()):
            final_predicts.append(y_predict)
final_predicts=np.array(final_predicts).reshape(-1,)
predict_survived_pytorch=pd.Series(final_predicts,name='Survived')
pytorch_result=pd.concat([IDtest,predict_survived_pytorch],axis=1)
pytorch_result.to_csv('pytorch_result.csv',index=False)
pytorch_result.head()
            

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1


# 使用skorch搭建模型

In [96]:
from skorch import NeuralNetClassifier
from skorch.dataset import CVSplit
from sklearn.model_selection import GridSearchCV
import skorch
print(skorch.__version__)

class skorchModule(nn.Module):
    def __init__(self,num_units=20):
        super(skorchModule,self).__init__()
        self.net=ClassifierModule(num_units=num_units)
    
    def forward(self,X):
        X=self.net(X)
        X=F.softmax(X,dim=-1)
        return X
        
        
model = NeuralNetClassifier(
        skorchModule,
        max_epochs=200,
        lr=0.02,
        train_split=CVSplit(5),
        #     device='cuda',  # uncomment this to train with CUDA
    )
   

X = X_train.astype(np.float32)
Y = Y_train.astype(np.int64)

params = {
        'lr': [0.02],
        'max_epochs': [200],
        'module__num_units': [20],
}

# 用scoring='neg_log_loss'训练不正确？不知道为啥，只好在Module层加入softmax
gs = GridSearchCV(model, params, refit=True, cv=2, scoring='accuracy')
gs.fit(X, Y)
print(gs.best_score_, gs.best_params_)
gs.estimator.set_params(**gs.best_params_).fit(X, Y)

predict_Survived_skorch = pd.Series(gs.predict(X_test.astype(np.float32)), name='Survived')
skorch_result = pd.concat([IDtest, predict_Survived_skorch], axis=1)
skorch_result.to_csv('skorch_result.csv', index=False)
skorch_result.head()

0.3.0
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        0.6876       0.6404        0.6841  0.0120
      2        0.6834       0.6404        0.6816  0.0110
      3        0.6788       0.6404        0.6791  0.0165
      4        0.6802       0.6404        0.6769  0.0140
      5        0.6817       0.6404        0.6748  0.0140
      6        0.6749       0.6404        0.6727  0.0120
      7        0.6718       0.6404        0.6708  0.0135
      8        0.6759       0.6404        0.6690  0.0135
      9        0.6729       0.6404        0.6673  0.0135
     10        0.6675       0.6404        0.6657  0.0150
     11        0.6768       0.6404        0.6643  0.0175
     12        0.6694       0.6404        0.6628  0.0135
     13        0.6613       0.6404        0.6614  0.0175
     14        0.6581       0.6404        0.6600  0.0175
     15        0.6718       0.6404        0.6590  0.0155
     16        0.6615    

    142        0.5872       0.7640        0.5410  0.0135
    143        0.5785       0.7753        0.5397  0.0160
    144        0.5877       0.7753        0.5389  0.0180
    145        0.5702       0.7753        0.5378  0.0130
    146        0.5794       0.7865        0.5369  0.0160
    147        0.5627       0.8090        0.5355  0.0105
    148        0.5617       0.8090        0.5343  0.0140
    149        0.5665       0.8090        0.5330  0.0100
    150        0.5657       0.8090        0.5320  0.0105
    151        0.5763       0.8090        0.5309  0.0095
    152        0.5792       0.8090        0.5300  0.0140
    153        0.5647       0.8090        0.5289  0.0105
    154        0.5547       0.8090        0.5274  0.0115
    155        0.5838       0.8202        0.5264  0.0130
    156        0.5688       0.8202        0.5252  0.0155
    157        0.5574       0.8202        0.5238  0.0115
    158        0.5820       0.8202        0.5231  0.0135
    159        0.5476       0.8

     83        0.6385       0.5667        0.6534  0.0110
     84        0.6407       0.5667        0.6528  0.0125
     85        0.6330       0.5667        0.6523  0.0105
     86        0.6390       0.5667        0.6517  0.0150
     87        0.6364       0.5667        0.6513  0.0120
     88        0.6320       0.5667        0.6508  0.0120
     89        0.6397       0.5667        0.6505  0.0120
     90        0.6207       0.5667        0.6497  0.0130
     91        0.6261       0.5667        0.6490  0.0145
     92        0.6372       0.5667        0.6482  0.0135
     93        0.6330       0.5667        0.6478  0.0105
     94        0.6280       0.5667        0.6471  0.0135
     95        0.6265       0.5667        0.6460  0.0130
     96        0.6316       0.5667        0.6454  0.0110
     97        0.6313       0.5667        0.6449  0.0110
     98        0.6410       0.5667        0.6445  0.0130
     99        0.6269       0.5667        0.6438  0.0155
    100        0.6259       0.5

     24        0.6613       0.6704        0.6375  0.0215
     25        0.6668       0.6704        0.6371  0.0230
     26        0.6695       0.6704        0.6368  0.0255
     27        0.6639       0.6704        0.6363  0.0205
     28        0.6628       0.6704        0.6357  0.0230
     29        0.6657       0.6704        0.6353  0.0240
     30        0.6667       0.6704        0.6349  0.0249
     31        0.6686       0.6704        0.6344  0.0225
     32        0.6656       0.6704        0.6340  0.0225
     33        0.6663       0.6704        0.6336  0.0220
     34        0.6668       0.6704        0.6332  0.0190
     35        0.6614       0.6704        0.6327  0.0230
     36        0.6646       0.6704        0.6322  0.0235
     37        0.6615       0.6704        0.6317  0.0259
     38        0.6627       0.6704        0.6311  0.0205
     39        0.6620       0.6704        0.6306  0.0240
     40        0.6599       0.6704        0.6301  0.0259
     41        0.6639       0.6

    167        0.5534       0.8380        0.4986  0.0250
    168        0.5576       0.8436        0.4982  0.0245
    169        0.5423       0.8436        0.4973  0.0250
    170        0.5360       0.8436        0.4962  0.0235
    171        0.5464       0.8436        0.4952  0.0235
    172        0.5358       0.8436        0.4940  0.0235
    173        0.5571       0.8436        0.4937  0.0235
    174        0.5468       0.8436        0.4931  0.0255
    175        0.5529       0.8436        0.4922  0.0250
    176        0.5292       0.8380        0.4912  0.0255
    177        0.5411       0.8436        0.4907  0.0235
    178        0.5390       0.8436        0.4898  0.0235
    179        0.5353       0.8436        0.4888  0.0245
    180        0.5409       0.8436        0.4880  0.0250
    181        0.5518       0.8436        0.4873  0.0255
    182        0.5510       0.8436        0.4874  0.0250
    183        0.5312       0.8436        0.4867  0.0190
    184        0.5312       0.8

    107        0.5557       0.8212        0.5131  0.0274
    108        0.5600       0.8212        0.5121  0.0250
    109        0.5585       0.8268        0.5108  0.0225
    110        0.5592       0.8212        0.5089  0.0269
    111        0.5648       0.8212        0.5073  0.0264
    112        0.5494       0.8436        0.5056  0.0250
    113        0.5690       0.8380        0.5053  0.0240
    114        0.5704       0.8380        0.5050  0.0215
    115        0.5583       0.8380        0.5036  0.0220
    116        0.5451       0.8380        0.5018  0.0250
    117        0.5537       0.8380        0.5004  0.0235
    118        0.5651       0.8268        0.5002  0.0269
    119        0.5580       0.8380        0.4987  0.0210
    120        0.5492       0.8268        0.4979  0.0215
    121        0.5618       0.8268        0.4975  0.0245
    122        0.5500       0.8268        0.4963  0.0230
    123        0.5571       0.8268        0.4956  0.0260
    124        0.5459       0.8

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1


# 参考
1、[Pytorch data_loading tutorial](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html#)

2、[Pytorch_transfer_learning_tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)

3、[skorch basic usage](https://nbviewer.jupyter.org/github/dnouri/skorch/blob/master/notebooks/Basic_Usage.ipynb)