In [1]:
import torch
import numpy as np
import pandas as pd
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import LabelEncoder

---
## Classifying Diabetes

In [2]:
xy = np.loadtxt('data-diabetes.csv', delimiter=',', dtype=np.float32)

x_data = Variable(torch.from_numpy(xy[:, 0:-1]))
y_data = Variable(torch.from_numpy(xy[:, [-1]]))

#imput data shape: 8*8
print(x_data.data.shape) # torch.Size([759, 8])

#result shape: 8*1
print(y_data.data.shape) # torch.Size([759, 1])

torch.Size([8, 8])
torch.Size([8, 1])


In [3]:
class Model(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(8, 6)
        self.l2 = torch.nn.Linear(6, 4)
        self.l3 = torch.nn.Linear(4, 1)

        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred

# our model
model = Model()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.BCELoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# Training loop
for epoch in range(100):
        # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x_data)

    # Compute and print loss
    loss = criterion(y_pred, y_data)
    print(epoch, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 0.6941694021224976
1 0.6940499544143677
2 0.6939440965652466
3 0.6938505172729492
4 0.6937676668167114
5 0.6936941742897034
6 0.6936291456222534
7 0.6935715079307556
8 0.6935203075408936
9 0.6934749484062195
10 0.6934346556663513
11 0.6933989524841309
12 0.6933671832084656
13 0.6933388710021973
14 0.6933136582374573
15 0.6932913064956665
16 0.6932712197303772
17 0.6932533383369446
18 0.6932373642921448
19 0.6932229399681091
20 0.6932101249694824
21 0.693198561668396
22 0.6931881904602051
23 0.6931787729263306
24 0.6931702494621277
25 0.6931625008583069
26 0.6931554675102234
27 0.6931490302085876
28 0.6931432485580444
29 0.6931378245353699
30 0.6931328177452087
31 0.6931282877922058
32 0.6931240558624268
33 0.6931200623512268
34 0.693116307258606
35 0.6931129097938538
36 0.6931096315383911
37 0.693106472492218
38 0.693103551864624
39 0.6931008100509644
40 0.6930980682373047
41 0.6930955648422241
42 0.6930930614471436
43 0.6930906772613525
44 0.6930884122848511
45 0.6930861473083496
46



---
## Note. Titanic dataset Classifier

In [4]:
#read data
raw = pd.read_csv('./train.csv')
raw.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
#read data
raw = pd.read_csv('./train.csv')

#preprocess

#raw: training data
raw = raw.iloc[:,[2, 4, 5, 6, 7, 9, -1]]
raw['Age'] = raw['Age'].fillna(raw['Age'].mean())
raw['Embarked'] = raw['Embarked'].fillna('unknown')

#encording
#sex: male=1, female=2
#embarked: C=0, Q=1, S=2, unknown=3
for col in range(raw.shape[1]):
    raw.iloc[:,col] = LabelEncoder().fit_transform(raw.iloc[:,col])

In [6]:
raw.to_csv('./train_p.csv', index = False, header = False)

In [7]:
#dataloader
class TitanicDataset(Dataset):

    # Initialize your data, download, etc.
    def __init__(self):
        xy = np.loadtxt('train_p.csv', delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, 1:])
        self.y_data = torch.from_numpy(xy[:, [0]])

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len
    
dataset = TitanicDataset()
train_loader = DataLoader(dataset = dataset, batch_size = 32, shuffle = True, num_workers = 0)

In [8]:
class Model(torch.nn.Module):

    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = torch.nn.Linear(6, 3)
        self.l2 = torch.nn.Linear(3, 2)
        self.l3 = torch.nn.Linear(2, 1)

        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred


model = Model()

criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [9]:
# training loop
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        inputs, labels = Variable(inputs), Variable(labels)

        y_pred = model(inputs)

        loss = criterion(y_pred, labels)
        print(epoch, i, loss.data)
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

0 0 tensor(0.6740)
0 1 tensor(0.5924)
0 2 tensor(0.5732)
0 3 tensor(0.4511)
0 4 tensor(0.4555)
0 5 tensor(0.2856)
0 6 tensor(0.1101)
0 7 tensor(0.2092)
0 8 tensor(0.3540)
0 9 tensor(0.0295)
0 10 tensor(0.0191)
0 11 tensor(-0.1912)
0 12 tensor(0.1454)
0 13 tensor(-0.3460)
0 14 tensor(-0.1108)
0 15 tensor(-0.3569)
0 16 tensor(-0.4172)
0 17 tensor(0.0041)
0 18 tensor(-0.6998)
0 19 tensor(-0.2007)
0 20 tensor(-0.5498)
0 21 tensor(-0.3682)
0 22 tensor(-0.3959)
0 23 tensor(-0.7920)
0 24 tensor(-0.5262)
0 25 tensor(0.2453)
0 26 tensor(-0.6932)
0 27 tensor(0.1029)
1 0 tensor(-0.5997)
1 1 tensor(-0.1224)
1 2 tensor(-0.8610)
1 3 tensor(-0.7518)
1 4 tensor(-0.7045)
1 5 tensor(-0.9743)
1 6 tensor(-1.2668)
1 7 tensor(-0.7198)
1 8 tensor(-1.2745)
1 9 tensor(-1.0529)
1 10 tensor(-1.5612)
1 11 tensor(-1.2340)
1 12 tensor(-0.9732)
1 13 tensor(-0.1705)
1 14 tensor(-0.7939)
1 15 tensor(-0.8149)
1 16 tensor(-0.7242)
1 17 tensor(-1.0668)
1 18 tensor(-0.8683)
1 19 tensor(-0.4296)
1 20 tensor(-1.1247)
1 21 t