In [219]:
import pandas as pd
import math
import sklearn
import statsmodels.api as sm
from sklearn import linear_model
from sklearn import preprocessing
import numpy as np


In [220]:
df = pd.read_csv('crime.csv')
df.count()

City                                      351
Population                                348
Violent\ncrime                            348
Murder and\nnonnegligent\nmanslaughter    348
Rape\n(revised\ndefinition)1                0
Rape\n(legacy\ndefinition)2               348
Robbery                                   348
Aggravated\nassault                       348
Property\ncrime                           348
Burglary                                  348
Larceny-\ntheft                           348
Motor\nvehicle\ntheft                     348
Arson3                                    187
dtype: int64

In [221]:
df = df.drop('Rape\n(revised\ndefinition)1',1)
df = df.drop('City',1)
df=(df.replace(',','', regex=True ))


In [222]:
df = df.dropna()

df['Population'] = pd.to_numeric(df['Population'], errors='coerce')
df['Violent\ncrime'] = pd.to_numeric(df['Violent\ncrime'], errors='coerce')
df['Rape\n(legacy\ndefinition)2'] = pd.to_numeric(df['Rape\n(legacy\ndefinition)2'], errors='coerce')
df['Robbery'] = pd.to_numeric(df['Robbery'], errors='coerce')
df['Aggravated\nassault'] = pd.to_numeric(df['Aggravated\nassault'], errors='coerce')
df['Property\ncrime'] = pd.to_numeric(df['Property\ncrime'], errors='coerce')
df['Burglary'] = pd.to_numeric(df['Burglary'], errors='coerce')
df['Larceny-\ntheft'] = pd.to_numeric(df['Larceny-\ntheft'], errors='coerce')
df['Motor\nvehicle\ntheft'] = pd.to_numeric(df['Motor\nvehicle\ntheft'], errors='coerce')
df.dtypes

Population                                  int64
Violent\ncrime                              int64
Murder and\nnonnegligent\nmanslaughter    float64
Rape\n(legacy\ndefinition)2                 int64
Robbery                                     int64
Aggravated\nassault                         int64
Property\ncrime                             int64
Burglary                                    int64
Larceny-\ntheft                             int64
Motor\nvehicle\ntheft                       int64
Arson3                                    float64
dtype: object

In [223]:
df['popsqr'] = df['Population'] ** 2
df['BugRob'] = df['Burglary'] * df['Robbery']
df['Larcenysqr'] = df['Larceny-\ntheft'] ** 2

In [224]:
size = int(df.shape[0]/2)
df_train = df.iloc[size:,:]
df_test = df.iloc[:size,:]

In [225]:
df_test.dtypes

Population                                  int64
Violent\ncrime                              int64
Murder and\nnonnegligent\nmanslaughter    float64
Rape\n(legacy\ndefinition)2                 int64
Robbery                                     int64
Aggravated\nassault                         int64
Property\ncrime                             int64
Burglary                                    int64
Larceny-\ntheft                             int64
Motor\nvehicle\ntheft                       int64
Arson3                                    float64
popsqr                                      int64
BugRob                                      int64
Larcenysqr                                  int64
dtype: object

In [230]:
X_train = df_train.drop('Property\ncrime',1)
Y_train = df_train['Property\ncrime']


X_test = df_test.drop('Property\ncrime',1)
Y_test = df_test['Property\ncrime']


lr = linear_model.LogisticRegression()
lrfit = lr.fit(X_train, Y_train)

ridge = linear_model.Ridge(alpha=.3, normalize=True)
ridfit = ridge.fit(X_train,Y_train)

lasso = linear_model.Lasso(alpha=.3, normalize=True)
lassfit = lasso.fit(X_train,Y_train)



print('Logistic Score:')
print(lrfit.score(X_test,Y_test))
print()


print('Ridge Score:')
print(ridfit.score(X_test,Y_test))
print('Coef: ')
print(ridge.coef_)
print()

print('Lasso Score:')
print(lassfit.score(X_test,Y_test))
print('Coef:')
print(lasso.coef_)

Logistic Score:
0.0

Ridge Score:
0.7896413722557923
Coef: 
[ 3.36390784e-03  2.78466955e-01  1.53804600e+01  9.89491560e+00
  4.88679799e-01  5.90571020e-01  5.52627741e-01  2.49500991e-01
  1.41907008e+00  6.54465796e+00 -4.05234531e-09  2.15900964e-04
  1.78675679e-05]

Lasso Score:
0.9999964884640001
Coef:
[0.         0.         0.         0.         0.         0.
 0.99981757 1.00025361 0.96082031 0.         0.         0.
 0.        ]
