In [1]:
import math
import warnings

from IPython.display import display
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import linear_model
import statsmodels.formula.api as smf

# Display preferences.
%matplotlib inline
pd.options.display.float_format = '{:.3f}'.format

# Suppress annoying harmless error.
warnings.filterwarnings(
    action="ignore",
    module="scipy",
    message="^internal gelsd"
)

In [2]:
# Acquire, load, and preview the data.
data = pd.read_csv('table_8.csv',thousands=',')
display(data.head())

Unnamed: 0,City,Population,Violent_crime,Murder,Rape1,Rape2,Robbery,Assault,Property_crime,Burglary,Larceny- theft,Motor_vehicle_theft,Arson3
0,Adams Village,1861.0,0.0,0.0,,0.0,0.0,0.0,12.0,2.0,10.0,0.0,0.0
1,Addison Town and Village,2577.0,3.0,0.0,,0.0,0.0,3.0,24.0,3.0,20.0,1.0,0.0
2,Akron Village,2846.0,3.0,0.0,,0.0,0.0,3.0,16.0,1.0,15.0,0.0,0.0
3,Albany,97956.0,791.0,8.0,,30.0,227.0,526.0,4090.0,705.0,3243.0,142.0,
4,Albion Village,6388.0,23.0,0.0,,3.0,4.0,16.0,223.0,53.0,165.0,5.0,


In [3]:
data['Murder_dum'] = np.where(data['Murder']>0, 1, 0)
data['Robbery_dum'] = np.where(data['Robbery']>0, 1, 0)

In [4]:
data.head()

Unnamed: 0,City,Population,Violent_crime,Murder,Rape1,Rape2,Robbery,Assault,Property_crime,Burglary,Larceny- theft,Motor_vehicle_theft,Arson3,Murder_dum,Robbery_dum
0,Adams Village,1861.0,0.0,0.0,,0.0,0.0,0.0,12.0,2.0,10.0,0.0,0.0,0,0
1,Addison Town and Village,2577.0,3.0,0.0,,0.0,0.0,3.0,24.0,3.0,20.0,1.0,0.0,0,0
2,Akron Village,2846.0,3.0,0.0,,0.0,0.0,3.0,16.0,1.0,15.0,0.0,0.0,0,0
3,Albany,97956.0,791.0,8.0,,30.0,227.0,526.0,4090.0,705.0,3243.0,142.0,,1,1
4,Albion Village,6388.0,23.0,0.0,,3.0,4.0,16.0,223.0,53.0,165.0,5.0,,0,1


In [5]:
data.insert(2,'Pop2',data['Population']*data['Population'])

In [6]:
data

Unnamed: 0,City,Population,Pop2,Violent_crime,Murder,Rape1,Rape2,Robbery,Assault,Property_crime,Burglary,Larceny- theft,Motor_vehicle_theft,Arson3,Murder_dum,Robbery_dum
0,Adams Village,1861.000,3463321.000,0.000,0.000,,0.000,0.000,0.000,12.000,2.000,10.000,0.000,0.000,0,0
1,Addison Town and Village,2577.000,6640929.000,3.000,0.000,,0.000,0.000,3.000,24.000,3.000,20.000,1.000,0.000,0,0
2,Akron Village,2846.000,8099716.000,3.000,0.000,,0.000,0.000,3.000,16.000,1.000,15.000,0.000,0.000,0,0
3,Albany,97956.000,9595377936.000,791.000,8.000,,30.000,227.000,526.000,4090.000,705.000,3243.000,142.000,,1,1
4,Albion Village,6388.000,40806544.000,23.000,0.000,,3.000,4.000,16.000,223.000,53.000,165.000,5.000,,0,1
5,Alfred Village,4089.000,16719921.000,5.000,0.000,,0.000,3.000,2.000,46.000,10.000,36.000,0.000,,0,1
6,Allegany Village,1781.000,3171961.000,3.000,0.000,,0.000,0.000,3.000,10.000,0.000,10.000,0.000,0.000,0,0
7,Amherst Town,118296.000,13993943616.000,107.000,1.000,,7.000,31.000,68.000,2118.000,204.000,1882.000,32.000,3.000,1,1
8,Amityville Village,9519.000,90611361.000,9.000,0.000,,2.000,4.000,3.000,210.000,16.000,188.000,6.000,1.000,0,1
9,Amsterdam,18182.000,330585124.000,30.000,0.000,,0.000,12.000,18.000,405.000,99.000,291.000,15.000,0.000,0,1


In [7]:
data.describe()

Unnamed: 0,Population,Pop2,Violent_crime,Murder,Rape1,Rape2,Robbery,Assault,Property_crime,Burglary,Larceny- theft,Motor_vehicle_theft,Arson3,Murder_dum,Robbery_dum
count,348.0,348.0,348.0,348.0,0.0,348.0,348.0,348.0,348.0,348.0,348.0,348.0,187.0,351.0,351.0
mean,40037.632,203554650960.236,201.595,1.566,,5.865,72.902,121.261,792.606,119.684,637.017,35.905,1.872,0.14,0.595
std,450037.368,3778875542708.13,2815.269,18.304,,60.425,1031.033,1706.132,7659.725,924.949,6346.054,403.424,10.693,0.347,0.492
min,526.0,276676.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3003.0,9018117.0,2.0,0.0,,0.0,0.0,1.0,40.5,6.0,31.0,0.0,0.0,0.0,0.0
50%,7233.5,52325684.5,6.0,0.0,,0.0,1.0,4.0,112.5,17.5,94.0,2.0,0.0,0.0,1.0
75%,18427.5,339753567.0,22.0,0.0,,2.0,5.0,14.0,341.0,51.25,287.25,7.0,1.0,0.0,1.0
max,8396126.0,70494931807876.0,52384.0,335.0,,1112.0,19170.0,31767.0,141971.0,16606.0,117931.0,7434.0,132.0,1.0,1.0


In [8]:
data.shape

(351, 16)

In [10]:
data.dropna(axis=0, inplace=True, thresh=10)

In [15]:
data.drop(['Rape1'], axis=1)

Unnamed: 0,City,Population,Pop2,Violent_crime,Murder,Rape2,Robbery,Assault,Property_crime,Burglary,Larceny- theft,Motor_vehicle_theft,Arson3,Murder_dum,Robbery_dum
0,Adams Village,1861.000,3463321.000,0.000,0.000,0.000,0.000,0.000,12.000,2.000,10.000,0.000,0.000,0,0
1,Addison Town and Village,2577.000,6640929.000,3.000,0.000,0.000,0.000,3.000,24.000,3.000,20.000,1.000,0.000,0,0
2,Akron Village,2846.000,8099716.000,3.000,0.000,0.000,0.000,3.000,16.000,1.000,15.000,0.000,0.000,0,0
3,Albany,97956.000,9595377936.000,791.000,8.000,30.000,227.000,526.000,4090.000,705.000,3243.000,142.000,,1,1
4,Albion Village,6388.000,40806544.000,23.000,0.000,3.000,4.000,16.000,223.000,53.000,165.000,5.000,,0,1
5,Alfred Village,4089.000,16719921.000,5.000,0.000,0.000,3.000,2.000,46.000,10.000,36.000,0.000,,0,1
6,Allegany Village,1781.000,3171961.000,3.000,0.000,0.000,0.000,3.000,10.000,0.000,10.000,0.000,0.000,0,0
7,Amherst Town,118296.000,13993943616.000,107.000,1.000,7.000,31.000,68.000,2118.000,204.000,1882.000,32.000,3.000,1,1
8,Amityville Village,9519.000,90611361.000,9.000,0.000,2.000,4.000,3.000,210.000,16.000,188.000,6.000,1.000,0,1
9,Amsterdam,18182.000,330585124.000,30.000,0.000,0.000,12.000,18.000,405.000,99.000,291.000,15.000,0.000,0,1


In [12]:
data.shape

(348, 16)