# Ligistic Regression

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score

In [0]:
iris = datasets.load_iris()
x = iris.data[:, :2]
y = iris.target

In [0]:
dataframe = pd.DataFrame(x, columns = iris.feature_names[:2])
dataframe.head()

Unnamed: 0,sepal length (cm),sepal width (cm)
0,5.1,3.5
1,4.9,3.0
2,4.7,3.2
3,4.6,3.1
4,5.0,3.6


In [0]:
lg = LogisticRegression(C=1.0, solver="sag", multi_class="ovr", max_iter=1000, tol=0.001)

In [0]:
lg.fit(x, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='sag', tol=0.001, verbose=0,
                   warm_start=False)

In [0]:
lg.coef_

array([[-2.69466689,  3.76631943],
       [ 0.2053588 , -2.20816907],
       [ 1.97862024, -0.75575361]])

In [0]:
lg.intercept_

array([  2.66216812,   4.69302207, -10.30521213])

In [0]:
lg.predict([[4.95, 3.2]])

array([0])

In [0]:
lg.score(x, y)

0.8

Appyling multi_class mode as **multinomial**

In [0]:
lg = LogisticRegression(C=1.0, solver="sag", multi_class="multinomial", max_iter=1000, tol=0.001)
lg.fit(x, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='sag', tol=0.001, verbose=0,
                   warm_start=False)

In [0]:
(lg.coef_, lg.intercept_)

(array([[-2.15939471,  2.76175662],
        [ 0.49117944, -1.68532597],
        [ 1.66821527, -1.07643065]]),
 array([ 3.523797  ,  2.76431185, -6.28810885]))

In [0]:
lg.score(x, y)

0.8133333333333334

Applying the above model to whole data...

In [0]:
iris = datasets.load_iris()
x = iris.data[:, :]
y = iris.target

In [0]:
df_x = pd.DataFrame(x, columns=iris.feature_names)
df_x.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [0]:
x_train, x_test, y_train, y_test = train_test_split(df_x, y, test_size=0.3)

In [0]:
LM = LogisticRegression(C=1.0, solver="sag", multi_class="multinomial", max_iter=1000, tol=0.001)
LM.fit(x_train, y_train)
LM.score(x_test, y_test)

1.0

# K - Nearest Neighbour

In [0]:
from sklearn.neighbors import KNeighborsClassifier

In [0]:
df = pd.read_table("fruit_dataset.txt")
df.head()

Unnamed: 0,fruit_label,fruit_name,fruit_subtype,mass,width,height,color_score
0,1,apple,granny_smith,192,8.4,7.3,0.55
1,1,apple,granny_smith,180,8.0,6.8,0.59
2,1,apple,granny_smith,176,7.4,7.2,0.6
3,2,mandarin,mandarin,86,6.2,4.7,0.8
4,2,mandarin,mandarin,84,6.0,4.6,0.79


In [0]:
x = df[['mass', 'width', 'height', 'color_score']]
x.head()

Unnamed: 0,mass,width,height,color_score
0,192,8.4,7.3,0.55
1,180,8.0,6.8,0.59
2,176,7.4,7.2,0.6
3,86,6.2,4.7,0.8
4,84,6.0,4.6,0.79


In [0]:
Y_name = df['fruit_name']
y = df['fruit_label']

In [0]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(x, Y_name)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=7, p=2,
                     weights='uniform')

In [0]:
# Passing parameters 
knn.predict([[175, 7.0, 6.6, 0.69]])

array(['apple'], dtype=object)