In [56]:
import _pickle as pickle
import pandas as pd
import numpy as np
from statsmodels.discrete.discrete_model import Poisson

In [2]:
with open('pickles/stations_df.pkl', 'rb') as file:
    stations_df = pickle.load(file)

In [12]:
berkeley_livermore_stations_df = stations_df[(stations_df['STATE'] == 'CA') & (stations_df['NAME'].isin(['BERKELEY', 'LIVERMORE']))]
berkeley_livermore_stations = berkeley_livermore_stations_df['ID'].tolist()

In [13]:
berkeley_livermore_stations_df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATE,NAME,GSN FLAG,HCN/CRN FLAG,WMO ID
53,USC00040693,37.8744,-122.2606,94.5,CA,BERKELEY,,HCN,
73,USC00044997,37.6922,-121.7692,146.3,CA,LIVERMORE,,HCN,


In [14]:
# generate coefficients for both stations and compare them

In [15]:
with open('pickles/weather_df_dict.pkl', 'rb') as file:
    weather_df_dict = pickle.load(file)

In [16]:
# compile dataframes with only temperature
temp_dfs = list()
for year in range(1980, 2010):
    temp_df = weather_df_dict[year]
    temp_df = temp_df[temp_df['ID'].isin(berkeley_livermore_stations)]
    temp_dfs.append(temp_df[temp_df['ELEMENT'] == 'TMAX'].drop(columns=['ELEMENT']))

In [17]:
# df with all stations' tempurature time series
all_temp_df = pd.concat(temp_dfs)
all_temp_df.reset_index(drop=True, inplace=True)
all_temp_df = all_temp_df.pivot(index='date', columns='ID', values='DATA VALUE')
all_temp_df = all_temp_df / 10 * 9.0 / 5.0 + 32 # convert tenths of celsius to fahrenheit
all_temp_df.rename(columns=dict(zip(berkeley_livermore_stations, ['berkeley', 'livermore'])), inplace=True)

In [18]:
all_temp_df.head()

ID,berkeley,livermore
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-01-01,59.0,60.98
1980-01-02,53.96,59.0
1980-01-03,55.94,55.04
1980-01-04,55.04,48.92
1980-01-05,55.94,53.96


In [19]:
# compile dataframes with only precipitation
prcp_dfs = list()
for year in range(1980, 2010):
    prcp_df = weather_df_dict[year]
    prcp_df = prcp_df[prcp_df['ID'].isin(berkeley_livermore_stations)]
    prcp_dfs.append(prcp_df[prcp_df['ELEMENT'] == 'PRCP'].drop(columns=['ELEMENT']))

In [20]:
# df with all stations' precipitation time series
all_prcp_df = pd.concat(prcp_dfs)
all_prcp_df.reset_index(drop=True, inplace=True)
all_prcp_df = all_prcp_df.pivot(index='date', columns='ID', values='DATA VALUE')
all_prcp_df = all_prcp_df / 10  # convert tenths of mm to mm
all_prcp_df.rename(columns=dict(zip(berkeley_livermore_stations, ['berkeley', 'livermore'])), inplace=True)

In [21]:
all_prcp_df.head()

ID,berkeley,livermore
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-01-01,0.0,1.3
1980-01-02,0.3,0.0
1980-01-03,0.3,0.0
1980-01-04,0.0,0.0
1980-01-05,0.0,0.0


In [22]:
temp_bin_filters = [
    lambda df: (df < 10),
    lambda df: (10 <= df) & (df < 20),
    lambda df: (20 <= df) & (df < 30),
    lambda df: (30 <= df) & (df < 40),
    lambda df: (40 <= df) & (df < 50),
    lambda df: (50 <= df) & (df < 60),
    lambda df: (60 <= df) & (df < 70),
    lambda df: (70 <= df) & (df < 80),
    lambda df: (80 <= df) & (df < 90),
    lambda df: (90 <= df) & (df < 100),
    lambda df: (100 <= df)
]

In [23]:
prcp_bin_filters = [
    lambda df: (df.apply(lambda x: np.isclose(x, 0))),
    lambda df: (0 < df) & (df < 5),
    lambda df: (5 <= df) & (df < 15),
    lambda df: (15 <= df) & (df < 30),
    lambda df: (30 <= df)
]

In [24]:
dfs = list()

In [25]:
for filt in temp_bin_filters:
    # creates boolean dataframe saying whether or not value is in bin
    df = filt(all_temp_df).astype(int)
    # groups by year and month and counts number of trues (boolean sum is equivalent to count)
    df = df.groupby([df.index.year, df.index.month]).sum()
    df.index.rename(['year', 'month'], inplace=True)
    dfs.append(df)

In [26]:
for filt in prcp_bin_filters:
    df = filt(all_prcp_df).astype(int)
    df = df.groupby([df.index.year, df.index.month]).sum()
    df.index.rename(['year', 'month'], inplace=True)
    dfs.append(df)

In [27]:
# let tbi represent the ith temperature bin, pbi represent the ith precipitation bin
bin_col_names = [f'tb{str(i).zfill(2)}' for i in range(1, 12)] + [f'pb{i}' for i in range(1, 6)]
bin_col_names

['tb01',
 'tb02',
 'tb03',
 'tb04',
 'tb05',
 'tb06',
 'tb07',
 'tb08',
 'tb09',
 'tb10',
 'tb11',
 'pb1',
 'pb2',
 'pb3',
 'pb4',
 'pb5']

In [28]:
temp_prcp_df = pd.concat(dfs, axis=1, keys=bin_col_names)
# temp_prcp_df.columns = temp_prcp_df.columns.reorder_levels([1, 2, 0])
# temp_prcp_df.sort_index(axis=1, inplace=True)
temp_prcp_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,tb01,tb01,tb02,tb02,tb03,tb03,tb04,tb04,tb05,tb05,...,pb1,pb1,pb2,pb2,pb3,pb3,pb4,pb4,pb5,pb5
Unnamed: 0_level_1,ID,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,...,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1980,1,0,0,0,0,0,0,0,0,1,1,...,19,21,7,5,2,1,2,4,1,0
1980,2,0,0,0,0,0,0,0,0,0,0,...,18,18,3,2,1,8,6,0,1,1
1980,3,0,0,0,0,0,0,0,0,0,0,...,24,24,2,5,2,2,1,0,0,0
1980,4,0,0,0,0,0,0,0,0,0,0,...,25,23,3,4,0,2,2,1,0,0
1980,5,0,0,0,0,0,0,0,0,0,0,...,30,26,0,4,1,1,0,0,0,0


In [29]:
# create lag dataframe
temp_prcp_lag_df = temp_prcp_df.copy()
# simply shift all months up by 1
temp_prcp_lag_df = temp_prcp_lag_df.shift(periods=1, axis=0)
# zero out NaNs generated by shift
temp_prcp_lag_df.loc[temp_prcp_lag_df.index[0]][:] = 0
temp_prcp_lag_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,tb01,tb01,tb02,tb02,tb03,tb03,tb04,tb04,tb05,tb05,...,pb1,pb1,pb2,pb2,pb3,pb3,pb4,pb4,pb5,pb5
Unnamed: 0_level_1,ID,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,...,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1980,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1980,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,19.0,21.0,7.0,5.0,2.0,1.0,2.0,4.0,1.0,0.0
1980,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,18.0,18.0,3.0,2.0,1.0,8.0,6.0,0.0,1.0,1.0
1980,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,24.0,24.0,2.0,5.0,2.0,2.0,1.0,0.0,0.0,0.0
1980,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,25.0,23.0,3.0,4.0,0.0,2.0,2.0,1.0,0.0,0.0


In [30]:
# add and "l" before each bin column name to indicate lag column
lag_col_rename_dict = {col_name: f'l{col_name}' for col_name in bin_col_names}

In [31]:
lag_col_rename_dict

{'tb01': 'ltb01',
 'tb02': 'ltb02',
 'tb03': 'ltb03',
 'tb04': 'ltb04',
 'tb05': 'ltb05',
 'tb06': 'ltb06',
 'tb07': 'ltb07',
 'tb08': 'ltb08',
 'tb09': 'ltb09',
 'tb10': 'ltb10',
 'tb11': 'ltb11',
 'pb1': 'lpb1',
 'pb2': 'lpb2',
 'pb3': 'lpb3',
 'pb4': 'lpb4',
 'pb5': 'lpb5'}

In [32]:
temp_prcp_lag_df.rename(columns=lag_col_rename_dict, inplace=True)

In [33]:
temp_prcp_df = pd.concat([temp_prcp_df, temp_prcp_lag_df], axis=1).sort_index(axis=1)

In [34]:
temp_prcp_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lpb1,lpb1,lpb2,lpb2,lpb3,lpb3,lpb4,lpb4,lpb5,lpb5,...,tb07,tb07,tb08,tb08,tb09,tb09,tb10,tb10,tb11,tb11
Unnamed: 0_level_1,ID,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,...,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore,berkeley,livermore
year,month,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
1980,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9,10,0,0,0,0,0,0,0,0
1980,2,19.0,21.0,7.0,5.0,2.0,1.0,2.0,4.0,1.0,0.0,...,22,27,0,0,0,0,0,0,0,0
1980,3,18.0,18.0,3.0,2.0,1.0,8.0,6.0,0.0,1.0,1.0,...,21,22,1,5,0,0,0,0,0,0
1980,4,24.0,24.0,2.0,5.0,2.0,2.0,1.0,0.0,0.0,0.0,...,19,13,3,10,0,4,0,0,0,0
1980,5,25.0,23.0,3.0,4.0,0.0,2.0,2.0,1.0,0.0,0.0,...,23,9,2,14,0,5,0,2,0,0


In [35]:
temp_prcp_df.shape

(360, 64)

In [36]:
weather_df = temp_prcp_df.stack()

In [37]:
weather_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lpb1,lpb2,lpb3,lpb4,lpb5,ltb01,ltb02,ltb03,ltb04,ltb05,...,tb02,tb03,tb04,tb05,tb06,tb07,tb08,tb09,tb10,tb11
year,month,ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1980,1,berkeley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,21,9,0,0,0,0
1980,1,livermore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,20,10,0,0,0,0
1980,2,berkeley,19.0,7.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0,0,0,0,7,22,0,0,0,0
1980,2,livermore,21.0,5.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0,0,0,0,2,27,0,0,0,0
1980,3,berkeley,18.0,3.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,9,21,1,0,0,0


In [38]:
with open('pickles/all_crime_df.pkl', 'rb') as file:
    all_crime_df = pickle.load(file)

In [39]:
all_crime_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,population,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
year,month,state,fips,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1980,1,1,1001,31972.0,0.0,0.0,0.0,7.0,3.0,1.0,29.0,76.0,0.0
1980,1,1,1003,78135.0,0.0,0.0,1.0,17.0,3.0,5.0,73.0,86.0,10.0
1980,1,1,1005,24132.0,0.0,0.0,1.0,5.0,12.0,1.0,14.0,44.0,6.0
1980,1,1,1007,6431.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1980,1,1,1009,36175.0,0.0,0.0,1.0,0.0,0.0,1.0,18.0,9.0,6.0


In [40]:
crime_df = all_crime_df.reset_index()

In [41]:
crime_df.head()

Unnamed: 0,year,month,state,fips,population,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
0,1980,1,1,1001,31972.0,0.0,0.0,0.0,7.0,3.0,1.0,29.0,76.0,0.0
1,1980,1,1,1003,78135.0,0.0,0.0,1.0,17.0,3.0,5.0,73.0,86.0,10.0
2,1980,1,1,1005,24132.0,0.0,0.0,1.0,5.0,12.0,1.0,14.0,44.0,6.0
3,1980,1,1,1007,6431.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1980,1,1,1009,36175.0,0.0,0.0,1.0,0.0,0.0,1.0,18.0,9.0,6.0


In [42]:
crime_df = crime_df[crime_df['fips'] == '06001'].drop(columns=['state', 'fips', 'population']).set_index(['year', 'month'])

In [43]:
crime_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1980,1,10.0,0.0,64.0,418.0,805.0,557.0,2647.0,4772.0,707.0
1980,2,10.0,0.0,63.0,382.0,773.0,483.0,2332.0,4350.0,569.0
1980,3,14.0,0.0,70.0,432.0,845.0,538.0,2454.0,4945.0,658.0
1980,4,13.0,0.0,79.0,384.0,915.0,541.0,2294.0,4519.0,586.0
1980,5,12.0,0.0,67.0,421.0,871.0,530.0,2411.0,4876.0,651.0


In [55]:
crime_cols = list(crime_df.columns)

In [45]:
df = pd.merge(
    weather_df.reset_index(),
    crime_df.reset_index(),
    on=['year', 'month'],
    how='inner'
).set_index(['year', 'month'])

In [46]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,lpb1,lpb2,lpb3,lpb4,lpb5,ltb01,ltb02,ltb03,ltb04,...,tb11,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1980,1,berkeley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,64.0,418.0,805.0,557.0,2647.0,4772.0,707.0
1980,1,livermore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,64.0,418.0,805.0,557.0,2647.0,4772.0,707.0
1980,2,berkeley,19.0,7.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,63.0,382.0,773.0,483.0,2332.0,4350.0,569.0
1980,2,livermore,21.0,5.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,63.0,382.0,773.0,483.0,2332.0,4350.0,569.0
1980,3,berkeley,18.0,3.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,...,0,14.0,0.0,70.0,432.0,845.0,538.0,2454.0,4945.0,658.0


In [52]:
berkeley_df = df[df['ID'] == 'berkeley'].drop(columns='ID')
berkeley_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lpb1,lpb2,lpb3,lpb4,lpb5,ltb01,ltb02,ltb03,ltb04,ltb05,...,tb11,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1980,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,64.0,418.0,805.0,557.0,2647.0,4772.0,707.0
1980,2,19.0,7.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0,10.0,0.0,63.0,382.0,773.0,483.0,2332.0,4350.0,569.0
1980,3,18.0,3.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,14.0,0.0,70.0,432.0,845.0,538.0,2454.0,4945.0,658.0
1980,4,24.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,13.0,0.0,79.0,384.0,915.0,541.0,2294.0,4519.0,586.0
1980,5,25.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,12.0,0.0,67.0,421.0,871.0,530.0,2411.0,4876.0,651.0


In [53]:
livermore_df = df[df['ID'] == 'livermore'].drop(columns='ID')
livermore_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lpb1,lpb2,lpb3,lpb4,lpb5,ltb01,ltb02,ltb03,ltb04,ltb05,...,tb11,murder,manslaugther,rape,aggravated assault,simple assault,robbery,burglary,larceny,vehicle theft
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1980,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,10.0,0.0,64.0,418.0,805.0,557.0,2647.0,4772.0,707.0
1980,2,21.0,5.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0,10.0,0.0,63.0,382.0,773.0,483.0,2332.0,4350.0,569.0
1980,3,18.0,2.0,8.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,14.0,0.0,70.0,432.0,845.0,538.0,2454.0,4945.0,658.0
1980,4,24.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,13.0,0.0,79.0,384.0,915.0,541.0,2294.0,4519.0,586.0
1980,5,23.0,4.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,12.0,0.0,67.0,421.0,871.0,530.0,2411.0,4876.0,651.0


In [69]:
mod = Poisson(np.array(berkeley_df['murder']).astype(int), berkeley_df.drop(columns=crime_cols).astype(int))

In [71]:
res = mod.fit()

Optimization terminated successfully.
         Current function value: 6.591651
         Iterations 23


LinAlgError: Singular matrix