In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler # mean - 0 & standard deviation - 1
import matplotlib.pyplot as plt


In [2]:
iris=load_iris()


In [3]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [4]:
print(iris.DESCR)


.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [5]:
iris.feature_names # feature means  columns in dataset

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [6]:
iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [7]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [8]:
len(iris.data)


150

In [9]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [10]:
iris.target[0:50] # first 50 data - iris setosa

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

In [11]:
iris.target[50:100] # 51 - 100 - versicolor


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1])

In [12]:
iris.target[100:150]# 101-150 = virginica

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2])

In [13]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [14]:
# feature scaling for better understanding

x = iris.data[0:100,:] # picks first 100 data with 4 columns ( 100 * 4)
y = iris.target[0:100] # (100,)


In [15]:
x.shape

(100, 4)

In [16]:
y.shape

(100,)

In [17]:
len(x)

100

In [18]:
scr = StandardScaler() # normalize -( mean = 0 , standard deviation = 1)
sx = scr.fit_transform(x)
sx


array([[-5.81065904e-01,  8.41837140e-01, -1.01297765e+00,
        -1.04211089e+00],
       [-8.94308978e-01, -2.07835104e-01, -1.01297765e+00,
        -1.04211089e+00],
       [-1.20755205e+00,  2.12033793e-01, -1.08231219e+00,
        -1.04211089e+00],
       [-1.36417359e+00,  2.09934449e-03, -9.43643106e-01,
        -1.04211089e+00],
       [-7.37687441e-01,  1.05177159e+00, -1.01297765e+00,
        -1.04211089e+00],
       [-1.11201292e-01,  1.68157493e+00, -8.04974023e-01,
        -6.86441647e-01],
       [-1.36417359e+00,  6.31902691e-01, -1.01297765e+00,
        -8.64276271e-01],
       [-7.37687441e-01,  6.31902691e-01, -9.43643106e-01,
        -1.04211089e+00],
       [-1.67741667e+00, -4.17769553e-01, -1.01297765e+00,
        -1.04211089e+00],
       [-8.94308978e-01,  2.09934449e-03, -9.43643106e-01,
        -1.21994552e+00],
       [-1.11201292e-01,  1.26170604e+00, -9.43643106e-01,
        -1.04211089e+00],
       [-1.05093052e+00,  6.31902691e-01, -8.74308565e-01,
      

In [19]:
log_reg = LogisticRegression(random_state=1)# randomstate should be an integer number
model = log_reg.fit(sx,y) # sx - preporcessed x values, y - target value


In [20]:
model.score(sx,y)

1.0

In [21]:
newdata = [[1.4,3.9,1.5,2.2]]
model.predict(newdata)


array([1])

In [22]:
model.predict_proba(newdata)*100
# 0 - setosa
# 1 - versicolor

array([[ 7.671067, 92.328933]])

In [23]:
# sigmoid - (0-0.5 = 0)
# (0.51 - 1.0 = 1)