In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('diabetes.txt', sep='\t', header=None).values
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
dtype = torch.cuda.FloatTensor

input_size = 9
num_classes = 2
learning_rate = 0.01
total_epochs = 2000
hidden_size=[10, 10]
batch_size = 300
lr_decay = 0.01
data.shape

(768, 11)

In [3]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.data=dataset
    def __getitem__(self, index):
        return self.data[index,:-num_classes], self.data[index,-num_classes:]
    def __len__(self):
        return self.data.shape[0]

In [4]:
train_loader = torch.utils.data.DataLoader(dataset = MyDataset(data[:600]), batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = MyDataset(data[600:]), batch_size=batch_size, shuffle=False)

In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        layers=[nn.Linear(input_size, hidden_size[0])]
        layers.append(nn.Sigmoid())
        self.num_layers = len(hidden_size)
        for i in range(len(hidden_size)-1):
            layers.append(nn.Linear(hidden_size[i],hidden_size[i+1]))
            layers.append(nn.Sigmoid())
        layers.append(nn.Linear(hidden_size[-1], num_classes))
        layers.append(nn.Sigmoid())
        self.model = nn.Sequential(*layers)
    def forward(self, x):
        return self.model(x)

In [6]:
def adjust_learning_rate(optimizer, iteration_count):
    """Imitating the original implementation"""
    lr = learning_rate / (1+lr_decay * iteration_count)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

In [12]:
%%time
learning_rate = [5e-9]
for lr in learning_rate:
    model = NeuralNet(input_size, hidden_size, num_classes).to(device).double()
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    total_step = len(train_loader)
    for epoch in range(total_epochs):
        for i, (datas, labels) in enumerate(train_loader):
            #lr = adjust_learning_rate(optimizer, total_step*epoch+i)
            datas = datas.to(device)
            labels = labels.to(device)
            out = model(datas)
            loss = criterion(out, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if(epoch+1)%2000==0 and i==0:
               print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Lr:{}' 
                       .format(epoch+1, total_epochs, i+1, total_step, loss.item(), lr))

Epoch [2000/2000], Step [1/2], Loss: 0.2361, Lr:5e-09
CPU times: user 1min 4s, sys: 809 ms, total: 1min 5s
Wall time: 16.5 s


In [13]:
with torch.no_grad():
    correct = 0
    total = 0
    for datas, labels in test_loader:
        datas = datas.to(device)
        labels = labels.to(device).long()
        out = model(datas)
        _, predicted = torch.max(out.data, 1)
        total += labels.size(0)
        _, labels = torch.max(labels, 1)
        correct += (torch.eq(predicted,labels)).sum().item()
    print(correct)
    print('Accuracy of the network on the 168 test images: {} %'.format(100.0 * correct / total))

109
Accuracy of the network on the 168 test images: 64.88095238095238 %


In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
training_labels = data[:,-1]
data = data[:,:-2]

In [10]:
Xtr, Xtest, ytr, ytest = train_test_split(data, training_labels, test_size = 0.5, stratify = training_labels)#, random_state = 42)
train_acc_hist = []
test_acc_hist = []
n_esitmators = range(200,300)

In [11]:
%%time
#for n in range(1, 200):
n=100
clf = RandomForestClassifier(n_estimators=n, n_jobs = -1)#, random_state = 52)
clf = clf.fit(Xtr, ytr)
y_test_pred = clf.predict(Xtest)
test_acc_hist.append(np.mean(ytest==y_test_pred))
y_train_pred = clf.predict(Xtr)
train_acc_hist.append(np.mean(ytr==y_train_pred))
if n%10==0:
    print("Accuracy of %d is %lf%%"%(n, test_acc_hist[-1]*100))

Accuracy of 100 is 78.125000%
CPU times: user 278 ms, sys: 60.8 ms, total: 339 ms
Wall time: 372 ms
