In [9]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
# load the data set form sklearn lib

In [14]:
from sklearn.datasets import load_iris
flower = load_iris()

In [15]:
flower.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [16]:
print(flower.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [17]:
print(flower.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [18]:
print(flower.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [None]:
# now creating a data set for ml model to train on it

In [19]:
dataset=pd.DataFrame(flower.data)

In [20]:
dataset.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [21]:
dataset=pd.DataFrame(flower.data,columns=flower.feature_names)

In [22]:
dataset.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [23]:
dataset.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [24]:
dataset.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

In [25]:
dataset.corr()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),1.0,-0.11757,0.871754,0.817941
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126
petal length (cm),0.871754,-0.42844,1.0,0.962865
petal width (cm),0.817941,-0.366126,0.962865,1.0


In [None]:
# sns.pairplot(dataset)

In [28]:
X=flower.data
X.shape

(150, 4)

In [29]:
y=flower.target

In [30]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=51)


In [31]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 4)
(30, 4)
(120,)
(30,)


In [32]:
# since the price and features are of the different scale so we need to scale it to same level
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)

X_train = sc.transform(X_train)
X_test = sc.transform(X_test)
 

In [33]:
reg=linear_model.LinearRegression()
reg.fit(X_train,y_train)


In [34]:
# to fing the accuracy of the model
reg.score(X_test,y_test)*100

91.28427196338097

In [35]:
from sklearn.linear_model import Ridge,Lasso
ridge=Ridge()
lasso=Lasso()

In [36]:
lasso.fit(X_train,y_train)
lasso.score(X_test,y_test)*100

-8.223684210526283

In [37]:
ridge.fit(X_train,y_train)
ridge.score(X_test,y_test)*100

91.54887865668154

In [38]:
flower.data[0].reshape(1,-1)

array([[5.1, 3.5, 1.4, 0.2]])

In [42]:
first_dataset = (flower.data[0].reshape(1,-1))
ridge.predict(first_dataset)

array([1.04663943])

In [43]:
reg.predict(first_dataset)

array([0.98830771])


# pickling the model file for deployment¶

In [44]:
import pickle
pickle.dump(sc,open('scaling.pkl','wb'))

In [45]:
# here the modle is kept safe 
# and this is regression modle
pickle.dump(reg,open('regmodle.pkl','wb'))

In [46]:
pickled_modle2=pickle.load(open('regmodle.pkl','rb'))

In [47]:
pickled_modle2.predict(first_dataset)

array([0.98830771])