##Polynomial Regression using Ridge Regression

In [1]:
from sklearn import linear_model
import sklearn
import pandas as pd

In [2]:
mammals = pd.read_csv('mammals.csv')

In [3]:
brain = mammals['brain']

#Let us add a square body variable
mammals['body_squared'] = mammals['body'] ** 2
mammals.head()

Unnamed: 0,Name,body,brain,body_squared
0,Arctic fox,3.385,44.5,11.458225
1,Owl monkey,0.48,15.5,0.2304
2,Mountain beaver,1.35,8.1,1.8225
3,Cow,465.0,423.0,216225.0
4,Grey wolf,36.33,119.5,1319.8689


In [4]:
# Let us initialise an object for the ridge regression
ridge = linear_model.Ridge()

In [5]:
body_squared = mammals[['body', 'body_squared']]
body_squared.head()

Unnamed: 0,body,body_squared
0,3.385,11.458225
1,0.48,0.2304
2,1.35,1.8225
3,465.0,216225.0
4,36.33,1319.8689


In [6]:
ridge.fit(body_squared, brain)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, solver='auto', tol=0.001)

In [7]:
print ridge.intercept_
print ridge.coef_

19.1153486389
[  2.12392861e+00  -1.89465860e-04]


In [8]:
#Let us see the R^2
ridge.score(body_squared, brain)

0.95353172658821017

##Logistic Regression

Using Logistic Regression to classify beer

In [9]:
beer = pd.read_csv('beer.txt', delimiter = '\t')
beer.head()

Unnamed: 0,Rank,Name,Brewery,Type,ABV,WR,Reviews
0,1,Heady Topper,The Alchemist,Imperial IPA,8.0,4.69,3146
1,2,Pliny The Younger,Russian River Brewing Company,Imperial IPA,11.0,4.65,1572
2,3,Pliny The Elder,Russian River Brewing Company,Imperial IPA,8.0,4.64,6129
3,4,Founders CBS Imperial Stout,Founders Brewing Company,Imperial Stout,10.6,4.63,2026
4,5,Founders KBS (Kentucky Breakfast Stout),Founders Brewing Company,Imperial Stout,11.2,4.61,4714


In [10]:
#Janitor Work - let us get rid of na in some rows
beer = beer.dropna()
beer.shape

(234, 7)

In [11]:
#For classification we need labels

def good(x):
    if x>4.3:
        return 1
    else:
        return 0
    
#Apply the function to the WR column
beer['good'] = beer['WR'].apply(good)   

In [12]:
#Let us create a logistic model instance

logm = linear_model.LogisticRegression()

In [13]:
my_input = beer[['Reviews', 'ABV']].values

good_output = beer['good'].values

good_output

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

In [14]:
#Let us fit the model

logm.fit(my_input, good_output)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001)

In [15]:
logm.predict(my_input)

array([0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

In [16]:
logm.score(my_input, good_output)

0.62393162393162394

In [17]:
#Let us change the penalty
logm2 = linear_model.LogisticRegression()
logm2.set_params(penalty = 'l1')

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, penalty='l1', random_state=None, tol=0.0001)

In [18]:
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 0.15.2.


In [24]:
pd.unique(beer['Type'].values.ravel())

array(['Imperial IPA', 'Imperial Stout', 'American Pale Ale', 'Fruit',
       'Old Ale', 'Porter', 'American IPA', 'English Barleywine',
       'Lambic Gueuze', 'Wild Ale', 'Red Ale', 'American Strong Ale',
       'Doppelbock', 'American Wild Ale', 'Gueuze', 'Farmhouse Ale',
       'Vegetable Beer', 'Belgian Strong Dark Ale', 'Weizenbock',
       'Eisbock', 'American Porter', 'Russian Imperial Stout', 'Scoth Ale',
       'Belgian Strong Pale Ale', 'Tripel', 'Hefeweizen',
       'Flanders Red Ale', 'American Pale Lager', 'American Double',
       'Imperial Pilsner', 'American Black Ale', 'American Barleywine',
       'Flanders Oud Bruin', 'Belgian IPA', 'Oatmeal Stout',
       'American Pale Wheat Ale', 'Dunkelweizen', 'American Amber',
       'Lambic', 'Rye Beer', 'Blatic Porter', 'Belgian Pale Ale',
       'American Stout'], dtype=object)

In [46]:
beer.groupby('Type').count()['Rank'].order(ascending = False)

Type
Imperial Stout             46
Imperial IPA               39
American IPA               25
Russian Imperial Stout     13
American Strong Ale        10
Fruit                       8
American Wild Ale           8
Gueuze                      6
Old Ale                     6
Belgian Strong Dark Ale     6
English Barleywine          5
American Porter             5
Flanders Red Ale            4
American Pale Ale           4
American Barleywine         4
Tripel                      4
American Double             3
Vegetable Beer              3
Farmhouse Ale               3
American Pale Wheat Ale     2
American Black Ale          2
Doppelbock                  2
Wild Ale                    2
Weizenbock                  2
Porter                      2
Hefeweizen                  2
Imperial Pilsner            2
Eisbock                     1
Lambic Gueuze               1
Scoth Ale                   1
Rye Beer                    1
Red Ale                     1
Oatmeal Stout               1
Ameri