#### Using Pytorch.

In [1]:
import pandas as pd 
import numpy as np

In [2]:
train_df = pd.read_csv("data/iris_train.csv", usecols=[1, 2, 3, 4, 5])

In [3]:
train_df.head()

Unnamed: 0,0,1,2,3,labels
0,6.3,2.9,5.6,1.8,Iris-virginica
1,5.6,3.0,4.5,1.5,Iris-versicolor
2,7.9,3.8,6.4,2.0,Iris-virginica
3,6.5,2.8,4.6,1.5,Iris-versicolor
4,5.0,2.0,3.5,1.0,Iris-versicolor


In [4]:
test_df = pd.read_csv("data/iris_test.csv", usecols=[1, 2, 3, 4])

In [5]:
test_df.head()

Unnamed: 0,0,1,2,3
0,7.2,3.6,6.1,2.5
1,4.7,3.2,1.3,0.2
2,4.4,3.2,1.3,0.2
3,4.5,2.3,1.3,0.3
4,6.4,3.2,4.5,1.5


In this case the inputs of the neural network are 4. The classification of the iris plant has 3 output - iris_virginica, iris_versicolor, iris_setosa


In [6]:
# train_df['iris_virginica']= (train_df["labels"] == "Iris-virginica").astype(int)
# train_df['iris_versicolor'] = (train_df["labels"] == "Iris-versicolor").astype(int)
# train_df['iris_setosa'] = (train_df["labels"] == "Iris-setosa").astype(int)
# train_df = train_df.drop(["labels"], axis=1)

In [7]:
train_df.labels, uniques = pd.factorize(train_df['labels'])

In [8]:
train_df.head()

Unnamed: 0,0,1,2,3,labels
0,6.3,2.9,5.6,1.8,0
1,5.6,3.0,4.5,1.5,1
2,7.9,3.8,6.4,2.0,0
3,6.5,2.8,4.6,1.5,1
4,5.0,2.0,3.5,1.0,1


In [9]:
# Create the dataset
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.autograd import Variable
import torch.nn.functional as F

class IrisDataset(Dataset):
    def __init__(self, dataset, train=True):
        self.dataset = dataset
        self.train = train
        if self.train:
            self.train_features = self.dataset.iloc[:, 0:4]
            self.train_label = self.dataset.iloc[:, 4]
        else:
            self.test_features = self.dataset.iloc[:, 0:4]

    def __getitem__(self, index):
        if self.train:
            # x_train=features and y_train=labels
            x_train, y_train = self.train_features.iloc[index],  self.train_label.iloc[index]
            x_train = self.get_tensor(x_train)
            return x_train, y_train
        else:
            x_test = self.test_features.iloc[index]
            x_test = self.get_tensor(x_test)
            
            return x_test

    def __len__(self):
        return len(self.dataset)

 
    def get_tensor(self, dataset):
        dataset = dataset.to_dict().values()
        dataset_tuple = tuple(dataset)
        dataset_tensor = torch.Tensor(dataset_tuple)
        return dataset_tensor



In [10]:
# hyper parameters
input_features = 4
hidden_size = 5
labels = 3
learning_rate = 0.01
batch_size = 3
num_epochs = 4

In [11]:
train_dataset = IrisDataset(train_df)
test_dataset = IrisDataset(test_df, train=False)

In [12]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=False)

In [13]:
# check if loder is iterablle
import collections
isinstance(train_loader, collections.Iterable)

True

In [14]:
# create a neural network given the four outputs.
class LogisticRegression(nn.Module):
    def __init__(self, input_features, output_labels):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_features, output_labels)
    
    def forward(self, x):
        out = self.linear(x)
        return out
model = LogisticRegression(input_features, labels)

In [15]:
# train the dataset
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [16]:
losses = []
for epoch in range(1, num_epochs + 1):
    for i, (feature, label) in enumerate(train_loader):

        feature = Variable(feature)
        label = Variable(label)
        label = label.type(torch.LongTensor)
#         print(label)
        optimizer.zero_grad()
        outputs = model(feature)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()


        losses.append(loss.cpu().data[0])
        loss = np.mean(losses)

#         if (i+1) % 10 == 0:
        print ('Epoch: [%d/%d],  Loss: %.4f' 
               % (epoch, num_epochs, loss))


Epoch: [1/4],  Loss: 1.5748
Epoch: [1/4],  Loss: 1.4249
Epoch: [1/4],  Loss: 1.4586
Epoch: [1/4],  Loss: 1.4376
Epoch: [1/4],  Loss: 1.4205
Epoch: [1/4],  Loss: 1.3895
Epoch: [1/4],  Loss: 1.3706
Epoch: [1/4],  Loss: 1.3090
Epoch: [1/4],  Loss: 1.2540
Epoch: [1/4],  Loss: 1.3000
Epoch: [1/4],  Loss: 1.2904
Epoch: [1/4],  Loss: 1.3127
Epoch: [1/4],  Loss: 1.2976
Epoch: [1/4],  Loss: 1.2960
Epoch: [1/4],  Loss: 1.2901
Epoch: [1/4],  Loss: 1.2904
Epoch: [1/4],  Loss: 1.3029
Epoch: [1/4],  Loss: 1.3030
Epoch: [1/4],  Loss: 1.2889
Epoch: [1/4],  Loss: 1.2887
Epoch: [1/4],  Loss: 1.2751
Epoch: [1/4],  Loss: 1.2609
Epoch: [1/4],  Loss: 1.2624
Epoch: [1/4],  Loss: 1.2680
Epoch: [1/4],  Loss: 1.2559
Epoch: [1/4],  Loss: 1.2561
Epoch: [1/4],  Loss: 1.2525
Epoch: [1/4],  Loss: 1.2327
Epoch: [1/4],  Loss: 1.2286
Epoch: [1/4],  Loss: 1.2237
Epoch: [1/4],  Loss: 1.2227
Epoch: [1/4],  Loss: 1.2135
Epoch: [1/4],  Loss: 1.2011
Epoch: [1/4],  Loss: 1.1930
Epoch: [1/4],  Loss: 1.1956
Epoch: [1/4],  Loss:

  app.launch_new_instance()


In [17]:
test_pred = pd.DataFrame()

predicted = []
print(test_loader.dataset.__len__())
for x in test_loader:
    
    x = Variable(x)
    outputs = model(x)
    prob = F.softmax(outputs.data, dim=1)
    print(prob)
    predicted.extend(prob)
# print(len(predicted))
test_pred['is_attributed'] = predicted
test_pred.tail()

40
tensor([[0.3701, 0.5239, 0.1060],
        [0.0399, 0.3127, 0.6474],
        [0.0424, 0.3067, 0.6508]])
tensor([[0.0890, 0.3993, 0.5117],
        [0.2554, 0.5234, 0.2212],
        [0.4361, 0.4448, 0.1191]])
tensor([[0.0522, 0.3295, 0.6183],
        [0.3182, 0.5025, 0.1792],
        [0.2644, 0.5132, 0.2224]])
tensor([[0.3522, 0.5129, 0.1350],
        [0.2589, 0.5134, 0.2277],
        [0.3444, 0.4968, 0.1589]])
tensor([[0.4757, 0.4411, 0.0832],
        [0.2736, 0.5121, 0.2143],
        [0.2132, 0.5096, 0.2772]])
tensor([[0.0437, 0.3051, 0.6512],
        [0.4710, 0.4425, 0.0866],
        [0.5572, 0.3761, 0.0666]])
tensor([[0.4189, 0.4804, 0.1007],
        [0.4001, 0.5013, 0.0986],
        [0.0401, 0.3274, 0.6325]])
tensor([[0.0405, 0.3324, 0.6271],
        [0.0503, 0.3295, 0.6203],
        [0.4200, 0.4481, 0.1319]])
tensor([[0.0259, 0.2966, 0.6775],
        [0.4540, 0.4381, 0.1079],
        [0.0370, 0.3095, 0.6535]])
tensor([[0.2750, 0.4953, 0.2297],
        [0.5586, 0.3544, 0.0869],
  

Unnamed: 0,is_attributed
35,"[tensor(0.2439), tensor(0.5167), tensor(0.2394)]"
36,"[tensor(0.3494), tensor(0.4758), tensor(0.1748)]"
37,"[tensor(0.3759), tensor(0.5047), tensor(0.1194)]"
38,"[tensor(0.3054), tensor(0.4861), tensor(0.2085)]"
39,"[tensor(0.0507), tensor(0.3477), tensor(0.6016)]"
