# Predictive Analytics
# Module 4 - Generalized Linear Models
## Demo 4 - Create Logistic, Exponential & Polynomial Regression Models

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
boston = load_boston()

In [2]:
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df['MEDV'] = boston.target

## Generalized Linear Models

### Logistic Regression

In [3]:
# Load dataset
from sklearn.datasets import load_wine
wine = load_wine()
wine_df = pd.DataFrame(wine.data, columns=wine.feature_names)
wine_df['class'] = wine.target
wine_df.head(4)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,class
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0


In [4]:
from sklearn.linear_model import LogisticRegression
X = wine.data
y = wine.target
logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')

# Create an instance of Logistic Regression Classifier and fit the data.
logreg.fit(X, y)



LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='multinomial', n_jobs=None, penalty='l2',
          random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
          warm_start=False)

In [5]:
# Perform prediction
wine_pred = logreg.predict(X)
print(wine_df[:4])
wine_pred[:4],

   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   

   od280/od315_of_diluted_wines  proline  class  
0                          3.92   1065.0      0  
1                          3.40   1050.0      0  
2                          3.17   1185.0      0  
3                 

(array([0, 0, 0, 0]),)

### Errors in model

In [6]:
from sklearn.metrics import accuracy_score

print("Accuracy:",accuracy_score(y, wine_pred))

Accuracy: 0.9662921348314607


### Exponential Regression

In [7]:
"""
For this example let's consider the boston housing data.
Exponential regression is of the form y = Ar^X.
log(y) = log(Ar^X)
log(y) = log(A) + X*log(r)
"""
from sklearn.linear_model import LinearRegression
log_y = np.log(boston_df.MEDV)
X4 = boston_df[['RM']]

slm_exp = LinearRegression()
slm_exp.fit(X4, log_y)
print("Coefficient:", slm_exp.coef_,"\nIntercept:", slm_exp.intercept_)

Coefficient: [0.36768673] 
Intercept: 0.7237361931657587


In [8]:
"""
retreiving A and r. if log(A) = m, then A = exp(m)
"""
r = np.exp(slm_exp.coef_[0])
A = np.exp(slm_exp.intercept_)
print('A:', A,"r:",r)

A: 2.062123326899287 r: 1.4443894874426626


In [9]:
# Predicting values
predict_exp = [A*(r**v[0]) for v in X4.values]
print("Predicted values: ", predict_exp[:4])
print("Actual values:\n", boston_df.MEDV[:4])

Predicted values:  [23.13337679723768, 21.859873609598075, 28.949859892104655, 27.026231878949403]
Actual values:
 0    24.0
1    21.6
2    34.7
3    33.4
Name: MEDV, dtype: float64


In [10]:
# Measure the accuracy
from sklearn.metrics import mean_squared_error, r2_score

print("Exponential Regression Mean squared error: %.2f" % mean_squared_error(boston_df.MEDV, predict_exp))

# Explained variance score: 1 is perfect prediction
print('Exponential Regression Variance score: %.2f' % r2_score(boston_df.MEDV, predict_exp))

Exponential Regression Mean squared error: 40.83
Exponential Regression Variance score: 0.52


### Polynomial regression

In [11]:
# using same boston housing data
X2 = boston_df.drop(["MEDV"], axis=1)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(2)
X_poly = poly.fit_transform(X2)

In [12]:
# Involving multiple parameters
y = boston_df.MEDV
slm_poly = LinearRegression()
slm_poly.fit(X_poly, y)
print("Coefficient:", slm_poly.coef_,"\nIntercept:", slm_poly.intercept_)

Coefficient: [-7.97162270e+08 -4.47560359e+00  1.86323597e-01 -4.64950095e+00
  2.99096228e+01  1.34635791e+02  2.17266153e+01  9.03246738e-01
 -7.41923580e+00  1.81653477e+00  6.19430932e-02  6.44945686e+00
  1.29947837e-01  7.34742201e-01  1.99793216e-03  3.34741125e-01
  4.02550006e-01  2.48205703e+00 -7.14321228e-01  1.12808887e-01
 -4.73501123e-03 -7.47335013e-02  1.92804227e-01 -2.34199263e-02
  3.90169990e-01 -1.98657113e-04  1.92372210e-02 -2.22418640e-05
 -3.24688054e-03 -2.03088751e-02 -1.09333125e+00  5.95574382e-03
  7.60547028e-04 -1.01172276e-02 -1.24850944e-02  6.80604587e-04
 -2.25603444e-03  5.52624070e-04 -6.38574403e-03  5.25691014e-02
  3.12674057e-02  1.07836182e+00  2.31660106e-01  4.99638578e-03
  1.74692246e-01 -3.28419255e-02  7.66482944e-04 -3.69484778e-02
  3.30999945e-03 -1.52964974e-02  2.99096228e+01 -3.41842947e+01
 -5.52330679e+00 -3.25491474e-02  1.09586600e+00 -7.76266128e-02
  1.10399779e-02 -9.46919792e-01  2.11780440e-02 -2.50401439e-01
 -3.50025060

In [13]:
predict_poly = slm_poly.predict(X_poly)
print("Predicted values: ", predict_poly[:4])
print("Actual values:\n", y[:4])

Predicted values:  [24.3257612  21.70742309 31.45342147 31.95919538]
Actual values:
 0    24.0
1    21.6
2    34.7
3    33.4
Name: MEDV, dtype: float64


In [14]:
from sklearn.metrics import mean_squared_error, r2_score

print("Mean squared error: %.2f" % mean_squared_error(y, predict_poly))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(y, predict_poly))

Mean squared error: 6.42
Variance score: 0.92
