In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix



In [2]:
df = pd.read_csv('/kaggle/input/iriscsv/Iris.csv')

In [3]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df['Species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

In [5]:
input_cols = list(df.columns)[1:-1]
target_col = 'Species'
print("Input Columns: ",input_cols)
print("Target_column:",target_col)

Input Columns:  ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
Target_column: Species


# **Data Preprocessing**

In [6]:
inputs = df[input_cols]
target = df[target_col]

In [7]:
scaler = StandardScaler()
inputs = scaler.fit_transform(inputs)

#  **Training And Validation**

In [8]:
train_inputs, test_inputs, train_targets, test_targets = train_test_split(inputs, target, test_size=0.2, random_state=42)

In [9]:
train_inputs, val_inputs, train_targets, val_targets = train_test_split(train_inputs, train_targets, test_size=0.25, random_state=42)

In [10]:
train_inputs.shape

(90, 4)

In [11]:
test_inputs.shape

(30, 4)

In [12]:
val_inputs.shape

(30, 4)

# **Logistic Regression Model**

In [13]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='liblinear')

In [14]:
model.fit(train_inputs, train_targets)

In [15]:
model.coef_

array([[-0.72170159,  1.40013229, -1.38708483, -1.2656646 ],
       [ 0.36920517, -1.2958433 ,  0.12445422, -0.54657811],
       [ 0.02714493, -0.29567209,  1.64547156,  2.12828459]])

In [16]:
pd.DataFrame({
    'feature': input_cols,
    'weights': model.coef_.tolist()[0]
})

Unnamed: 0,feature,weights
0,SepalLengthCm,-0.721702
1,SepalWidthCm,1.400132
2,PetalLengthCm,-1.387085
3,PetalWidthCm,-1.265665


In [17]:
model.intercept_

array([-1.3815705 , -0.85223539, -2.12828973])

# **Making Predictions and Validating Model**

In [18]:
train_preds = model.predict(train_inputs)

In [19]:
train_targets

122     Iris-virginica
2          Iris-setosa
66     Iris-versicolor
107     Iris-virginica
114     Iris-virginica
            ...       
130     Iris-virginica
10         Iris-setosa
58     Iris-versicolor
49         Iris-setosa
37         Iris-setosa
Name: Species, Length: 90, dtype: object

In [20]:
accuracy_score(train_targets, train_preds)

0.9

In [21]:
test_preds = model.predict(test_inputs)

In [22]:
list(test_targets)

['Iris-versicolor',
 'Iris-setosa',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-setosa',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-setosa',
 'Iris-setosa']

In [23]:
accuracy_score(test_targets, test_preds)

0.9333333333333333