In [None]:
import numpy as np

In [None]:
import pandas as pd

def load_crime_dataset():

  # Communities and Crime dataset for regression
  # https://archive.ics.uci.edu/ml/datasets/Communities+and+Crime+Unnormalized
  crime = pd.read_table('crimeDataset.txt', sep=',', na_values='?')

  # remove features with poor coverage or lower relevance, and keep ViolentCrimesPerPop target column
  columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145]

  # dropna(): Drop the rows where at least one element is missing.
  crime = crime.iloc[:,columns_to_keep].dropna()

  X_crime = crime.iloc[:, range(0,88)]
  # ViolentCrimesPerPop is the total number of violent crimes per 100K population
  y_crime = crime[ 'ViolentCrimesPerPop']

  return (X_crime, y_crime)

(X_crime, y_crime) = load_crime_dataset()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state = 0)

In [None]:
from sklearn.linear_model import Ridge

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state = 0)
linridge = Ridge(alpha=20.0).fit(X_train, y_train)

print('Crime dataset fitted by Ridge Regression')
print('ridge regression linear model intercept: {}'.format(linridge.intercept_))
print('ridge regression linear model coeff:\n{}'.format(linridge.coef_))
print('R-squared score (training): {:.3f}'.format(linridge.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'.format(linridge.score(X_test, y_test)))

Crime dataset fitted by Ridge Regression
ridge regression linear model intercept: -3352.423035846185
ridge regression linear model coeff:
[ 1.95091438e-03  2.19322667e+01  9.56286607e+00 -3.59178973e+01
  6.36465325e+00 -1.96885471e+01 -2.80715856e-03  1.66254486e+00
 -6.61426604e-03 -6.95450680e+00  1.71944731e+01 -5.62819154e+00
  8.83525114e+00  6.79085746e-01 -7.33614221e+00  6.70389803e-03
  9.78505502e-04  5.01202169e-03 -4.89870524e+00 -1.79270062e+01
  9.17572382e+00 -1.24454193e+00  1.21845360e+00  1.03233089e+01
 -3.78037278e+00 -3.73428973e+00  4.74595305e+00  8.42696855e+00
  3.09250005e+01  1.18644167e+01 -2.05183675e+00 -3.82210450e+01
  1.85081589e+01  1.52510829e+00 -2.20086608e+01  2.46283912e+00
  3.29328703e-01  4.02228467e+00 -1.12903533e+01 -4.69567413e-03
  4.27046505e+01 -1.22507167e-03  1.40795790e+00  9.35041855e-01
 -3.00464253e+00  1.12390514e+00 -1.82487653e+01 -1.54653407e+01
  2.41917002e+01 -1.32497562e+01 -4.20113118e-01 -3.59710660e+01
  1.29786751e+01 

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
print('Number of non-zero features: {}'.format(np.sum(linridge.coef_ != 0)))

Number of non-zero features: 88


In [None]:
print('Ridge regression: effect of lambda regularization parameter\n')

for this_alpha in [0, 1, 10, 20, 50, 100, 1000]:

  # The regularization parameter lamda is called alpha in sklearn
  linridge = Ridge(alpha = this_alpha).fit(X_train_scaled, y_train)
  r2_train = linridge.score(X_train_scaled, y_train)
  r2_test = linridge.score(X_test_scaled, y_test)
  num_coeff_bigger = np.sum(abs(linridge.coef_) > 1.0)

  print('Alpha = {:.2f}\nnum abs(coeff) > 1.0: {}, \ r-squared training: {:.2f}, r-squared test: {:.2f}\n'.format(this_alpha, num_coeff_bigger, r2_train, r2_test))

Ridge regression: effect of lambda regularization parameter

Alpha = 0.00
num abs(coeff) > 1.0: 87, \ r-squared training: 0.67, r-squared test: 0.49

Alpha = 1.00
num abs(coeff) > 1.0: 87, \ r-squared training: 0.66, r-squared test: 0.56

Alpha = 10.00
num abs(coeff) > 1.0: 87, \ r-squared training: 0.63, r-squared test: 0.59

Alpha = 20.00
num abs(coeff) > 1.0: 88, \ r-squared training: 0.61, r-squared test: 0.60

Alpha = 50.00
num abs(coeff) > 1.0: 86, \ r-squared training: 0.58, r-squared test: 0.58

Alpha = 100.00
num abs(coeff) > 1.0: 87, \ r-squared training: 0.55, r-squared test: 0.55

Alpha = 1000.00
num abs(coeff) > 1.0: 84, \ r-squared training: 0.31, r-squared test: 0.30



In [None]:
from sklearn. linear_model import Lasso

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state = 0)
linridge = Ridge(alpha=20.0).fit(X_train, y_train)

print('Crime dataset fitted by Ridge Regression')
print('ridge regression linear model intercept: {}'.format(linridge.intercept_))
print('ridge regression linear model coeff:\n{}'.format(linridge.coef_))
print('R-squared score (training): {:.3f}'.format(linridge.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'.format(linridge.score(X_test, y_test)))

Crime dataset fitted by Ridge Regression
ridge regression linear model intercept: -3352.423035846185
ridge regression linear model coeff:
[ 1.95091438e-03  2.19322667e+01  9.56286607e+00 -3.59178973e+01
  6.36465325e+00 -1.96885471e+01 -2.80715856e-03  1.66254486e+00
 -6.61426604e-03 -6.95450680e+00  1.71944731e+01 -5.62819154e+00
  8.83525114e+00  6.79085746e-01 -7.33614221e+00  6.70389803e-03
  9.78505502e-04  5.01202169e-03 -4.89870524e+00 -1.79270062e+01
  9.17572382e+00 -1.24454193e+00  1.21845360e+00  1.03233089e+01
 -3.78037278e+00 -3.73428973e+00  4.74595305e+00  8.42696855e+00
  3.09250005e+01  1.18644167e+01 -2.05183675e+00 -3.82210450e+01
  1.85081589e+01  1.52510829e+00 -2.20086608e+01  2.46283912e+00
  3.29328703e-01  4.02228467e+00 -1.12903533e+01 -4.69567413e-03
  4.27046505e+01 -1.22507167e-03  1.40795790e+00  9.35041855e-01
 -3.00464253e+00  1.12390514e+00 -1.82487653e+01 -1.54653407e+01
  2.41917002e+01 -1.32497562e+01 -4.20113118e-01 -3.59710660e+01
  1.29786751e+01 

In [None]:
linlasso = Lasso(alpha=2.0, max_iter = 10000).fit(X_train_scaled, y_train)

print('Crime dataset')
print('lasso regression linear model intercept: {}'.format(linlasso.intercept_))
print('lasso regression linear model coeff:\n{}'.format(linlasso.coef_))
print('Non-zero features: {}'.format(np.sum(linlasso.coef_ != 0)))
print('R-squared score (training): {:.3f}'.format(linlasso.score(X_train_scaled, y_train)))
print('R-squared score (test): {:.3f}\n'.format(linlasso.score(X_test_scaled, y_test)))

Crime dataset
lasso regression linear model intercept: 1186.612061998579
lasso regression linear model coeff:
[    0.             0.            -0.          -168.18346054
    -0.            -0.             0.           119.6938194
     0.            -0.             0.          -169.67564456
    -0.             0.            -0.             0.
     0.             0.            -0.            -0.
     0.            -0.             0.             0.
   -57.52991966    -0.            -0.             0.
   259.32889226    -0.             0.             0.
     0.            -0.         -1188.7396867     -0.
    -0.            -0.          -231.42347299     0.
  1488.36512229     0.            -0.            -0.
    -0.             0.             0.             0.
     0.             0.            -0.             0.
    20.14419415     0.             0.             0.
     0.             0.           339.04468804     0.
     0.           459.53799903    -0.             0.
   122.69221826    

In [None]:
print('Features with non-zero weight (sorted by absolute magnitude):')

for f in sorted (list(zip(list(X_crime), linlasso.coef_)), key = lambda f: abs(f[1]), reverse= True):
  if f[1] != 0:
    print('\t{}, {:.3f}'.format(f[0], f[1]))

Features with non-zero weight (sorted by absolute magnitude):
	PctKidsBornNeverMar, 1488.365
	PctKids2Par, -1188.740
	HousVacant, 459.538
	PctPersDenseHous, 339.045
	NumInShelters, 264.932
	MalePctDivorce, 259.329
	PctWorkMom, -231.423
	pctWInvInc, -169.676
	agePct12t29, -168.183
	PctVacantBoarded, 122.692
	pctUrban, 119.694
	MedOwnCostPctIncNoMtg, -104.571
	MedYrHousBuilt, 91.412
	RentQrange, 86.356
	OwnOccHiQuart, 73.144
	PctEmplManu, -57.530
	PctBornSameState, -49.394
	PctForeignBorn, 23.449
	PctLargHouseFam, 20.144
	PctSameCity85, 5.198


In [None]:
print('Lasso regression: effect of lambda regularization\n\ parameter on number of features kept in final model\n')

for alpha in [10.5, 1, 2, 3, 5, 10, 20, 50]:

  # The regularization parameter lamda is called alpha in sklearn
  linlasso = Lasso(alpha, max_iter=10000).fit(X_train_scaled, y_train)
  r2_train = linlasso.score(X_train_scaled, y_train)
  r2_test = linlasso.score(X_test_scaled, y_test)

  print('Alpha = {:.2f}\nFeatures kept: 13, r-squared training: 1:.2f3, \
r-squared test: {:.2f}\n'.format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))

Lasso regression: effect of lambda regularization
\ parameter on number of features kept in final model

Alpha = 10.50
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 5.00

Alpha = 1.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 25.00

Alpha = 2.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 20.00

Alpha = 3.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 17.00

Alpha = 5.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 12.00

Alpha = 10.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 6.00

Alpha = 20.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 2.00

Alpha = 50.00
Features kept: 13, r-squared training: 1:.2f3, r-squared test: 1.00

