<a href="https://colab.research.google.com/github/athiagarajan/PytorchMLDLFiles/blob/master/IRIS_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **IRIS Using Pytorch**

In [None]:
import torch
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Step 1. Load Dataset
# Step 2. Make Dataset Iterable
# Step 3. Create Model Class
# Step 4. Instantiate Model Class
# Step 5. Instantiate Loss Class
# Step 6. Instantiate Optimizer Class
# Step 7. Train Model

In [None]:
#From sklearn tutorial.
from sklearn.datasets import load_iris
iris = load_iris()
print( "Type of iris dataset:", type(iris))


Type of iris dataset: <class 'sklearn.utils.Bunch'>


In [None]:
#A bunch is you remember is a dictionary based dataset.  Dictionaries are addressed by keys. 
#Let's look at the keys. 
print(iris.keys())


dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])


In [None]:
#DESCR sounds like it could be useful. Let's print the description.
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [None]:
# Let's change the data to a Panda's Dataframe
import pandas as pd
iris_df = pd.DataFrame(iris['data'] )
iris_df.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [None]:
#Now add the column names.
iris_df.columns = iris['feature_names']
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [None]:
#Add the target as IRIS. 
iris_df['IRIS']= iris['target']
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),IRIS
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [None]:
#This will throw and error at import if haven't upgraded. 
# from sklearn.cross_validation  import train_test_split  
from sklearn.model_selection  import train_test_split
#y is the dependent variable.
y = iris_df['IRIS']
#As we know, iloc is used to slice the array by index number. Here this is the matrix of 
#independent variables.
X = iris_df.iloc[:,0:5]

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(105, 5) (45, 5) (105,) (45,)


In [None]:
import torch
from torch.utils.data import Dataset

class iris_set(Dataset):
    
    # Constructor with defult values 
    def __init__(self, X, y, transform = None):
        self.len = 105
        self.x = X
        self.y = y
        self.transform = transform
     
    # Getter
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)     
        return sample
    
    # Get Length
    def __len__(self):
        return self.len



In [None]:
# Load data using Pytorch Datasets
from torchvision import transforms
#train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
#test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor())
train_dataset = iris_set(X_train, y_train, transform=transforms.ToTensor())
test_dataset = iris_set(X_test, y_test, transform=transforms.ToTensor())
batch_size = 105


In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f455f8ceeb8>


SyntaxError: ignored

In [None]:
next(iter(train_loader))

TypeError: ignored

In [None]:

torch.utils.data.DataLoader??

In [None]:
#Create the Model
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear(x)
        return outputs

In [None]:
#Initialize params
batch_size = 105
n_iters = 3000
epochs = n_iters / (len(train_dataset) / batch_size)
input_dim = 105
output_dim = 11
lr_rate = 0.001

#Instantiate model
model = LogisticRegression(input_dim, output_dim)

In [None]:
#Instantiate Loss
criterion = torch.nn.CrossEntropyLoss() # computes softmax and then the cross entropy

In [None]:
#Instantiate Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate)
for i, (images, labels) in enumerate(train_loader):
  print(labels)


KeyError: ignored

In [None]:
#Train model
#iter = 0
for epoch in range(int(epochs)):
    for i, (x, y) in enumerate(train_loader):
        #x = Variable(x)
        #y = Variable(y)
        print(y)
        '''optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()'''

        '''iter+=1
        if iter%500==0:
            # calculate Accuracy
            correct = 0
            total = 0
            for x, y in test_loader:
                x = Variable(x)
                outputs = model(x)
                _, predicted = torch.max(outputs.data, 1)
                total+= y.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                correct+= (predicted == y).sum()
            accuracy = 100 * correct//total
            print("Iteration: {}. Loss: {}. Accuracy: {}.".format(iter, loss.item(), accuracy))'''

KeyError: ignored