## Importing Library

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.axes as ax

## About Data

This data comprised of kernels belonging to two different varieties of wheat: Kama and Canadian.

To construct the data, seven geometric parameters of wheat kernels were measured: 
1. area A, 
2. perimeter P, 
3. compactness C = 4.pi.A / P^2, 
4. length of kernel, 
5. width of kernel, 
6. asymmetry coefficient 
7. length of kernel groove.     

## Loading Data

In [7]:
data = pd.read_csv('C:\\Users\\dell\\Desktop\\seed_data.csv')

In [8]:
data = data[:1000]

**A little bit of data exploration**

In [9]:
data.head(10)

Unnamed: 0,area,perimeter,compactness,length,width,asymmetry,length_kernel_groove,kernel
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,0
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,0
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,0
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,0
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,0
5,14.38,14.21,0.8951,5.386,3.312,2.462,4.956,0
6,14.69,14.49,0.8799,5.563,3.259,3.586,5.219,0
7,16.63,15.46,0.8747,6.053,3.465,2.04,5.877,0
8,16.44,15.25,0.888,5.884,3.505,1.969,5.533,0
9,15.26,14.85,0.8696,5.714,3.242,4.543,5.314,0


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133 entries, 0 to 132
Data columns (total 8 columns):
area                    133 non-null float64
perimeter               133 non-null float64
compactness             133 non-null float64
length                  133 non-null float64
width                   133 non-null float64
asymmetry               133 non-null float64
length_kernel_groove    133 non-null float64
kernel                  133 non-null int64
dtypes: float64(7), int64(1)
memory usage: 8.4 KB


#### Counting unique values

In [18]:
data['kernel'].unique()

array([0, 1], dtype=int64)

In [16]:
data['kernel'].value_counts()

1    67
0    66
Name: kernel, dtype: int64

#### Splitting data

In [19]:
# training dataset and labels
x = data.drop(data.columns[[7]], axis = 1)
y = data['kernel']

In [20]:
x.head()

Unnamed: 0,area,perimeter,compactness,length,width,asymmetry,length_kernel_groove
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175


In [21]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: kernel, dtype: int64

In [22]:
# splitting into training and testing data
from sklearn.cross_validation import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.25, random_state =0)



## Feature Scaling

In [24]:
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain) 
xtest = sc_x.transform(xtest)

# <font color = "green">Logistic Regression</font>

### Training model

In [25]:
# using sklearn library
# fitting logistic regression model

from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, ytrain)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

### Predicting Test_input

In [26]:
y_pred = classifier.predict(xtest)

### Results

In [28]:
# Building confusion MAtrix

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(ytest, y_pred)

In [29]:
cm

array([[15,  1],
       [ 0, 18]], dtype=int64)

In [31]:
# finding accuracy

from sklearn.metrics import accuracy_score
accuracy_score(ytest, y_pred)

0.9705882352941176

**Keep Following and Practice more :)**

**GeeksforGeeks**