<a href="https://colab.research.google.com/github/jammy-bot/keras-to-pytorch/blob/features/wheat_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://archive.ics.uci.edu/ml/datasets/seeds

Abstract: Measurements of geometrical properties of kernels belonging to three different varieties of wheat. A soft X-ray technique and GRAINS package were used to construct all seven, real-valued attributes.

Attribute Information:

To construct the data, seven geometric parameters of wheat kernels were measured:
1. area A,
2. perimeter P,
3. compactness C = 4*pi*A/P^2,
4. length of kernel,
5. width of kernel,
6. asymmetry coefficient
7. length of kernel groove.
All of these parameters were real-valued continuous.



In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import preprocessing

from urllib.request import urlretrieve

In [2]:
# reading the dataset
seeds = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt'
urlretrieve(seeds)

# delimiting values separated by spaces of various lengths
df = pd.read_csv(seeds, delim_whitespace=True)

In [3]:
# adding column names to the dataframe
df.columns = ["A", "P", "C", "kernel_length", "kernel_width", 
              "asymmetry", "groove_length", "variety"]

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209 entries, 0 to 208
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   A              209 non-null    float64
 1   P              209 non-null    float64
 2   C              209 non-null    float64
 3   kernel_length  209 non-null    float64
 4   kernel_width   209 non-null    float64
 5   asymmetry      209 non-null    float64
 6   groove_length  209 non-null    float64
 7   variety        209 non-null    int64  
dtypes: float64(7), int64(1)
memory usage: 13.2 KB


In [5]:
# view basic statistics for variables
df.iloc[:, :-1].describe()

Unnamed: 0,A,P,C,kernel_length,kernel_width,asymmetry,groove_length
count,209.0,209.0,209.0,209.0,209.0,209.0,209.0
mean,14.84555,14.557943,0.870999,5.62789,3.258349,3.707278,5.408971
std,2.916545,1.308949,0.023686,0.444029,0.378603,1.503657,0.492487
min,10.59,12.41,0.8081,4.899,2.63,0.7651,4.519
25%,12.26,13.45,0.8567,5.262,2.941,2.587,5.045
50%,14.34,14.29,0.8735,5.52,3.232,3.6,5.224
75%,17.32,15.73,0.8879,5.98,3.562,4.773,5.877
max,21.18,17.25,0.9183,6.675,4.033,8.456,6.55


In [6]:
# viewing a random sample of rows from the dataframe
df.sample(5)

Unnamed: 0,A,P,C,kernel_length,kernel_width,asymmetry,groove_length,variety
197,12.62,13.67,0.8481,5.41,2.911,3.306,5.231,3
14,14.59,14.28,0.8993,5.351,3.333,4.185,4.781,1
163,11.14,12.79,0.8558,5.011,2.794,6.388,5.049,3
157,11.75,13.52,0.8082,5.444,2.678,4.378,5.31,3
95,19.31,16.59,0.8815,6.341,3.81,3.477,6.238,2


In [7]:
df["variety"].value_counts()

3    70
2    70
1    69
Name: variety, dtype: int64

In [8]:
# converting to numpy
x = df[["A", "P", "C", "kernel_length", "kernel_width", 
              "asymmetry", "groove_length"]].values

le = preprocessing.LabelEncoder()
y = le.fit_transform(df["variety"])
variety = le.classes_

In [9]:
print(f"Variable shape: {x.shape}\n")
print(f"Target shape: {y.shape}")

Variable shape: (209, 7)

Target shape: (209,)


In [10]:
# splitting into validation and training sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.25, random_state = 11)

# NN

In [11]:
# import pytorch library and modules
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

In [12]:
class Net(nn.Module):
    def __init__(self, in_count, output_count):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(in_count, 70) # first hidden layer
        self.fc2 = nn.Linear(70, 35) # 50 form hidden1 to 25 in hidden2
        self.fc3 = nn.Linear(35, output_count)
        self.LogSoftmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.fc3(x)
        return self.LogSoftmax(x)

In [13]:
# converting data to pytorch tensors
x_train= Variable(torch.Tensor(x_train).float())
x_test= Variable(torch.Tensor(x_test).float())

# converting class indexes
y_train= Variable(torch.LongTensor(y_train))
y_test= Variable(torch.LongTensor(y_test))

In [14]:
# instantiating the model
model = Net(x.shape[1], len(variety)) # number of columns, rows
criterion = nn.CrossEntropyLoss() # declaring the loss function
optimizer = torch.optim.Adam(model.parameters(),  # parameters are the weights
                             lr=0.01)

for epoch in range(100):
    optimizer.zero_grad()
    out = model(x_train)
    loss = criterion(out, y_train)
    loss.backward() # backward propagation
    optimizer.step() # applying the gradients
    print(f"Epoch: {epoch+1}, loss: {loss.item()}")

Epoch: 1, loss: 1.8150185346603394
Epoch: 2, loss: 3.2424304485321045
Epoch: 3, loss: 3.8024401664733887
Epoch: 4, loss: 2.967388153076172
Epoch: 5, loss: 1.7953919172286987
Epoch: 6, loss: 1.624898076057434
Epoch: 7, loss: 1.2729932069778442
Epoch: 8, loss: 1.2551294565200806
Epoch: 9, loss: 1.5043703317642212
Epoch: 10, loss: 1.4484364986419678
Epoch: 11, loss: 1.2124103307724
Epoch: 12, loss: 1.0290194749832153
Epoch: 13, loss: 0.9852668642997742
Epoch: 14, loss: 1.0320230722427368
Epoch: 15, loss: 1.0927050113677979
Epoch: 16, loss: 1.115511178970337
Epoch: 17, loss: 1.0906785726547241
Epoch: 18, loss: 1.0394376516342163
Epoch: 19, loss: 0.9864171147346497
Epoch: 20, loss: 0.9428744912147522
Epoch: 21, loss: 0.9093397259712219
Epoch: 22, loss: 0.8850933909416199
Epoch: 23, loss: 0.8706239461898804
Epoch: 24, loss: 0.8643702268600464
Epoch: 25, loss: 0.8611800074577332
Epoch: 26, loss: 0.8543314933776855
Epoch: 27, loss: 0.8387202620506287
Epoch: 28, loss: 0.8131958842277527
Epoch: 

# Evaluate Accuracy

In [15]:
from sklearn.metrics import accuracy_score

pred = model(x_test)
# dunder to ignore the first parameter
_ , predict_classes = torch.max(pred,1)

correct = accuracy_score(y_test, predict_classes) # (expected_classes,predict_classes)
print(f"Accuracy: {correct}")

Accuracy: 0.9056603773584906


# =====================

In [16]:
# viewing the dataset's first and last tensors
first_tensor = x_test[[0]]
print(f"First Tensor: {first_tensor}\n")
last_tensor = x_test[[-1]]
print(f"Last Tensor: {last_tensor}")

First Tensor: tensor([[18.7200, 16.3400,  0.8810,  6.2190,  3.6840,  2.1880,  6.0970]])

Last Tensor: tensor([[12.3000, 13.3400,  0.8684,  5.2430,  2.9740,  5.6370,  5.0630]])


In [17]:
# viewing the dataset's first and last item class
first_class = y[0] + 1 # add 1 to the index of the first value
print(f"First class: {first_class}\n")
last_class = y[-1] + 1 # add 1 to the index of the last value
print(f"Last class: {last_class}")

First class: 1

Last class: 3


In [18]:
pred_check = model(first_tensor)
_ , predict_classes = torch.max(pred_check,1)

# view the prediction loss for each class
print(f"\nClass loss: {pred_check}\n")
# print predicted class by adding 1 to the predicted index
print(f"First class prediction: {predict_classes + 1}")
# print actual
print(f"First class actual: {first_class}")


Class loss: tensor([[ -3.7463,  -0.0239, -16.8398]], grad_fn=<LogSoftmaxBackward>)

First class prediction: tensor([2])
First class actual: 1


In [19]:
pred_check = model(last_tensor)
_ , predict_classes = torch.max(pred_check,1)

# view the prediction loss for each class
print(f"\nClass loss: {pred_check}\n")
# print predicted class by adding 1 to the predicted index
print(f"Last class prediction: {predict_classes + 1}")
# print actual
print(f"Last class actual: {last_class}")


Class loss: tensor([[-3.6273, -8.3868, -0.0272]], grad_fn=<LogSoftmaxBackward>)

Last class prediction: tensor([3])
Last class actual: 3


# ...