In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import Logit, Probit
from __future__ import division
import seaborn as sns

%matplotlib inline
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 18.5, 10.5

def print_resids(preds, resids):
    ax = sns.regplot(preds, resids);
    ax.set(xlabel = 'Predicted values', ylabel = 'errors', title = 'Predicted values vs. Errors')
    plt.show();

## Problem 2

data on districts from on the 105th Congress (1997-8)

In [2]:
congress = pd.read_stata("http://rlhick.people.wm.edu/econ407/data/congressional_105.dta" )

In [3]:
congress.head()

Unnamed: 0,state,fipstate,sc,cd,repub,age65,black,blucllr,city,coast,...,miltpop,nucplant,popsqmi,populatn,rurlfarm,transprt,unemplyd,union,urban,whlretl
0,AK,2,81,1,1.0,25898,22566,31560,0,1,...,24991,0,0.964357,629099,1292,20903,26234,30.4,222119,50986
1,AL,1,41,1,1.0,72534,164556,43449,0,1,...,1524,0,85.095802,577630,5475,10925,19799,18.200001,301197,52983
2,AL,1,41,2,1.0,75396,139311,49886,0,0,...,11250,1,56.973942,577203,12491,11820,14127,18.200001,202022,52623
3,AL,1,41,3,1.0,74506,150175,65849,0,0,...,5804,0,66.19062,577116,8863,7891,17303,18.200001,157275,45284
4,AL,1,41,4,1.0,84691,38087,74068,0,0,...,463,0,63.142357,577058,16664,9711,16724,18.200001,71451,48266


### (a)

For the logit, probit and OLS (linear probability) models, I will estimate

$$ repub = \beta_0 + \beta_1 per\_age65 + \beta_2 per\_black + \beta_3 per\_bluecllr + \beta_4 city + \beta_5 mdnincm + \beta_6 per\_unemployed + \beta_7 union $$


where per_unemployed is the percentage of the district that is unemployed.

In [4]:
#making percentages
variables = ['unemplyd', 'age65', 'black', 'blucllr']

for v in variables:
    congress['per_' + v] = congress[[v, 'populatn']].apply(lambda row: (row[0] / row[1])*100, axis = 1)

In [5]:
congress.head()

Unnamed: 0,state,fipstate,sc,cd,repub,age65,black,blucllr,city,coast,...,rurlfarm,transprt,unemplyd,union,urban,whlretl,per_unemplyd,per_age65,per_black,per_blucllr
0,AK,2,81,1,1.0,25898,22566,31560,0,1,...,1292,20903,26234,30.4,222119,50986,4.170091,4.116681,3.587035,5.016698
1,AL,1,41,1,1.0,72534,164556,43449,0,1,...,5475,10925,19799,18.200001,301197,52983,3.427627,12.557173,28.488133,7.521943
2,AL,1,41,2,1.0,75396,139311,49886,0,0,...,12491,11820,14127,18.200001,202022,52623,2.447492,13.062302,24.135529,8.642713
3,AL,1,41,3,1.0,74506,150175,65849,0,0,...,8863,7891,17303,18.200001,157275,45284,2.998184,12.910056,26.021632,11.410011
4,AL,1,41,4,1.0,84691,38087,74068,0,0,...,16664,9711,16724,18.200001,71451,48266,2.898149,14.676341,6.600203,12.835452


In [6]:
congress.columns.values

array(['state', 'fipstate', 'sc', 'cd', 'repub', 'age65', 'black',
       'blucllr', 'city', 'coast', 'construc', 'cvllbrfr', 'enroll',
       'farmer', 'finance', 'forborn', 'gvtwrkr', 'intrland', 'landsqmi',
       'mdnincm', 'miltinst', 'miltmajr', 'miltpop', 'nucplant', 'popsqmi',
       'populatn', 'rurlfarm', 'transprt', 'unemplyd', 'union', 'urban',
       'whlretl', 'per_unemplyd', 'per_age65', 'per_black', 'per_blucllr'], dtype=object)

In [7]:
#check for null values
congress[congress.repub.isnull()]

Unnamed: 0,state,fipstate,sc,cd,repub,age65,black,blucllr,city,coast,...,rurlfarm,transprt,unemplyd,union,urban,whlretl,per_unemplyd,per_age65,per_black,per_blucllr
318,OK,40,53,1,,119200,99968,57876,1,0,...,1750,32948,27258,12.9,924952,117114,2.600284,11.371116,9.536474,5.521097


After doing some research on Oklahoma's first congressional district, they had republican congressman Steve Largent from 1994 to 2002. I will code that row as republican.

In [8]:
congress.repub.fillna(value = 1, inplace = True)

In [9]:
ind_vars = [ 'per_age65', 'per_black', 'per_blucllr', 'city','mdnincm', 'per_unemplyd', 'union']
dep_var = 'repub'

x_const = sm.add_constant(congress[ind_vars])
y = congress[dep_var]

#Logit and probit models

logit_results = Logit(y, x_const).fit()

probit_results = Probit(y, x_const).fit()

Optimization terminated successfully.
         Current function value: 0.542084
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.541259
         Iterations 7


Lets take a look at the summaries.

In [10]:
logit_results.summary()

0,1,2,3
Dep. Variable:,repub,No. Observations:,435.0
Model:,Logit,Df Residuals:,427.0
Method:,MLE,Df Model:,7.0
Date:,"Mon, 21 Nov 2016",Pseudo R-squ.:,0.2166
Time:,13:50:41,Log-Likelihood:,-235.81
converged:,True,LL-Null:,-301.01
,,LLR p-value:,5.158e-25

0,1,2,3,4,5
,coef,std err,z,P>|z|,[95.0% Conf. Int.]
const,9.5500,1.618,5.902,0.000,6.378 12.722
per_age65,-0.1199,0.037,-3.217,0.001,-0.193 -0.047
per_black,-0.0504,0.013,-3.956,0.000,-0.075 -0.025
per_blucllr,-0.0712,0.063,-1.135,0.256,-0.194 0.052
city,-0.6513,0.259,-2.519,0.012,-1.158 -0.145
mdnincm,-5.843e-05,1.92e-05,-3.037,0.002,-9.61e-05 -2.07e-05
per_unemplyd,-1.4488,0.233,-6.210,0.000,-1.906 -0.992
union,-0.0280,0.016,-1.705,0.088,-0.060 0.004


In [11]:
logit_marg = logit_results.get_margeff(dummy = True)
logit_marg.summary()

0,1
Dep. Variable:,repub
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[95.0% Conf. Int.]
per_age65,-0.0221,0.007,-3.363,0.001,-0.035 -0.009
per_black,-0.0093,0.002,-4.215,0.0,-0.014 -0.005
per_blucllr,-0.0122,0.01,-1.269,0.205,-0.031 0.007
city,-0.1201,0.046,-2.586,0.01,-0.211 -0.029
mdnincm,-1.077e-05,3.4e-06,-3.163,0.002,-1.74e-05 -4.1e-06
per_unemplyd,-0.267,0.035,-7.538,0.0,-0.336 -0.198
union,-0.0052,0.003,-1.725,0.085,-0.011 0.001


Most notably, every 1 percent increase in unemployment decreases the probability of a republican congressman by 26.7%. Second to this, is the proportion of senior citizens: a 1 percent increase in the population of senior citizens correlates to a 2.2 % decrease in the probability of having a republican congressman.

Interestingly, the percentage of black population tracks at almost a -1:1 ratio with probability.

A 10,000 \$ increase in median income results in a (-1.0769172934225099e-05 * 10000) = -0.10769 % decrease in probability that that district elects a republican congressman

Now we look at probit summary and marginal effects

In [12]:
probit_results.summary()

0,1,2,3
Dep. Variable:,repub,No. Observations:,435.0
Model:,Probit,Df Residuals:,427.0
Method:,MLE,Df Model:,7.0
Date:,"Mon, 21 Nov 2016",Pseudo R-squ.:,0.2178
Time:,13:50:41,Log-Likelihood:,-235.45
converged:,True,LL-Null:,-301.01
,,LLR p-value:,3.652e-25

0,1,2,3,4,5
,coef,std err,z,P>|z|,[95.0% Conf. Int.]
const,5.7115,0.939,6.085,0.000,3.872 7.551
per_age65,-0.0711,0.022,-3.253,0.001,-0.114 -0.028
per_black,-0.0301,0.007,-4.038,0.000,-0.045 -0.015
per_blucllr,-0.0425,0.037,-1.135,0.256,-0.116 0.031
city,-0.3945,0.155,-2.541,0.011,-0.699 -0.090
mdnincm,-3.476e-05,1.15e-05,-3.024,0.002,-5.73e-05 -1.22e-05
per_unemplyd,-0.8684,0.135,-6.421,0.000,-1.133 -0.603
union,-0.0171,0.010,-1.745,0.081,-0.036 0.002


In [13]:
probit_marg = probit_results.get_margeff()
probit_marg.summary()

0,1
Dep. Variable:,repub
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[95.0% Conf. Int.]
per_age65,-0.0218,0.006,-3.373,0.001,-0.034 -0.009
per_black,-0.0092,0.002,-4.273,0.0,-0.013 -0.005
per_blucllr,-0.013,0.011,-1.14,0.254,-0.035 0.009
city,-0.121,0.047,-2.599,0.009,-0.212 -0.030
mdnincm,-1.066e-05,3.42e-06,-3.121,0.002,-1.74e-05 -3.96e-06
per_unemplyd,-0.2663,0.035,-7.563,0.0,-0.335 -0.197
union,-0.0052,0.003,-1.763,0.078,-0.011 0.001


In [14]:
np.abs(probit_marg.margeff) - np.abs(logit_marg.margeff)

array([ -3.08426013e-04,  -6.42455778e-05,   8.71612231e-04,
         9.23354580e-04,  -1.10433743e-07,  -7.71184393e-04,
         8.50164399e-05])

As you can see, the marginal effects of both probit and logit are almost identical, with small decimal differences between them.


Now, I show the OLS model:

In [15]:
ols_results = sm.OLS(y, x_const).fit()
ols_results.summary()

0,1,2,3
Dep. Variable:,repub,R-squared:,0.224
Model:,OLS,Adj. R-squared:,0.211
Method:,Least Squares,F-statistic:,17.59
Date:,"Mon, 21 Nov 2016",Prob (F-statistic):,1.63e-20
Time:,13:50:41,Log-Likelihood:,-260.11
No. Observations:,435,AIC:,536.2
Df Residuals:,427,BIC:,568.8
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
const,2.0296,0.272,7.450,0.000,1.494 2.565
per_age65,-0.0196,0.007,-2.845,0.005,-0.033 -0.006
per_black,-0.0047,0.002,-2.741,0.006,-0.008 -0.001
per_blucllr,-0.0165,0.012,-1.418,0.157,-0.039 0.006
city,-0.1149,0.050,-2.307,0.022,-0.213 -0.017
mdnincm,-9.758e-06,3.58e-06,-2.728,0.007,-1.68e-05 -2.73e-06
per_unemplyd,-0.2259,0.038,-5.929,0.000,-0.301 -0.151
union,-0.0027,0.003,-0.902,0.368,-0.009 0.003

0,1,2,3
Omnibus:,5.824,Durbin-Watson:,1.82
Prob(Omnibus):,0.054,Jarque-Bera (JB):,43.811
Skew:,-0.284,Prob(JB):,3.07e-10
Kurtosis:,1.553,Cond. No.,478000.0


Just like the logit and probit models, the percentage of unemployed people has the biggest effect on the chances of a republican congressman. For every percentage point of unemployment, the chances of a republican go down by 22.6 %. The feature with the second biggest effect is the percentage of the population that is over age 65. For every percent increase in senior citizens, the chances of a republican decrease by 2 %.


### (b)

For the first 15 observations calculate $p(x_i,\beta)$ and whether the model predicts whether a district is republican or not 

In [16]:
from scipy.stats import norm

In [17]:
beta_matrix = np.matrix(probit_results.params.values )
beta_matrix = np.reshape(beta_matrix, (-1, 1))
x_matrix = np.matrix(x_const[:15])

In [18]:
xi_b = x_matrix * beta_matrix
p_xi_b = norm.cdf(xi_b)

In [19]:
by_hand = x_const.ix[:14, :]
by_hand['P(x_i, B)'] = pd.DataFrame(p_xi_b)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [20]:
by_hand.to_csv("calc_by_hand.csv")

In [21]:
by_hand['predict_repub'] = by_hand['P(x_i, B)'].apply(lambda x: 1 if x >= 0.5 else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [22]:
by_hand['repub'] = y[:15]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Contrast predicted vote with the actual vote given by each of the first 15 individuals. The column 'predict_repub' are the predictions, and the 'repub' column are the actual results. The predictions are done at a 0.5 threshold: If the model predicts more than a 50% chance of republican, then the prediction is 1, else 0.

In [23]:
by_hand[['predict_repub', 'repub']]

Unnamed: 0,predict_repub,repub
0,0,1.0
1,0,1.0
2,1,1.0
3,0,1.0
4,1,1.0
5,1,0.0
6,1,1.0
7,0,0.0
8,0,0.0
9,1,0.0


The confusion matrix has the the form

|      | Actual        |                | Total |
|-------------- | ------------- | -------------- | ----- |
|   Predicted   | 0             | 1              |       |
| 0             | true negative | false negative |row sum |
| 1             | false positive| true positive  |row sum |
| Total         | column sum    | column sum     |total sum |

In [24]:
from sklearn.metrics import confusion_matrix, classification_report

In [25]:
confusion_matrix(by_hand['repub'], by_hand['predict_repub'])

array([[3, 2],
       [4, 6]])

In [26]:
print classification_report(by_hand['repub'], by_hand['predict_repub'])

             precision    recall  f1-score   support

        0.0       0.43      0.60      0.50         5
        1.0       0.75      0.60      0.67        10

avg / total       0.64      0.60      0.61        15



So our model only classified democrats correctly in 3 / 7 times. It classified republicans correctly 6 / 8 times. What this tells me is that the model is pretty heavy handed in classifying republicans. I suspect that some factors have a greater influence on the probability than others, namely unemployment and the black population. 

### (c)

Now, I use the model to find where the Democrats should focus their resources. My criteria for this are districts where the probability of going republican is lower than 0.55 - 'swing districts' if you will. 

In [27]:
congress['probability'] = probit_results.predict()

congress[congress.probability.apply(lambda prob: True if (prob >= 0.5 and prob <= 0.55) else False)]

Unnamed: 0,state,fipstate,sc,cd,repub,age65,black,blucllr,city,coast,...,transprt,unemplyd,union,urban,whlretl,per_unemplyd,per_age65,per_black,per_blucllr,probability
19,CA,6,71,2,1.0,89325,8410,29098,0,0,...,8040,18987,25.4,185449,48240,3.304719,15.547166,1.463775,5.064556,0.504886
34,CA,6,71,17,0.0,59816,24991,30290,0,1,...,7374,18759,25.4,424528,54779,3.284846,10.474244,4.376117,5.304013,0.509042
42,CA,6,71,25,1.0,43152,25864,26610,1,0,...,9213,14237,25.4,506718,57774,2.483823,7.528407,4.512299,4.642448,0.530764
51,CA,6,71,34,0.0,50565,11212,55415,0,0,...,13773,18726,25.4,573456,58985,3.265464,8.81759,1.955163,9.663339,0.50402
66,CA,6,71,49,1.0,71244,30024,19415,1,0,...,9186,14314,25.4,573437,62206,2.496177,12.424033,5.235797,3.385725,0.549205
68,CA,6,71,51,1.0,65052,9333,24025,1,0,...,8320,13278,25.4,553964,61953,2.317884,11.355852,1.629222,4.193943,0.533052
103,FL,12,43,21,1.0,55885,23431,40625,1,0,...,24786,18107,9.6,557737,72140,3.219583,9.936842,4.166237,7.223481,0.507156
147,IN,18,22,2,1.0,75839,22634,56329,0,0,...,8074,16693,25.1,161951,55377,3.011432,13.681423,4.083194,10.161802,0.517377
164,KY,21,51,5,1.0,74683,5728,45791,0,0,...,10253,25431,20.4,0,41204,4.070021,11.952397,0.916719,7.328471,0.508003
168,LA,22,45,3,1.0,89478,203766,73010,0,1,...,24195,33656,13.8,350552,83487,3.41636,9.082752,20.683922,7.411114,0.505411


### (d) 
For both probit and logit, discuss the predictive accuracy of the model by comparing the predicted outcomes with actual chosen outcomes.

In [28]:
# countR2 = # correctly predicted / total #

preds_probit = [1 if x >= 0.5 else 0 for x in probit_results.predict()]
preds_logit = [1 if x >= 0.5 else 0 for x in logit_results.predict()]

In [29]:
confusion_matrix(congress['repub'], preds_probit)

array([[139,  68],
       [ 49, 179]])

In [30]:
confusion_matrix(congress['repub'], preds_logit)

array([[139,  68],
       [ 50, 178]])

In [31]:
countR2_probit =  (confusion_matrix(congress['repub'], preds_probit)[0,0] + 
confusion_matrix(congress['repub'], preds_probit)[1,1]) / np.sum(confusion_matrix(congress['repub'], preds_probit))

print "the count R^2 for the probit model: ", countR2_probit

the count R^2 for the probit model:  0.731034482759


In [32]:
countR2_logit =  (confusion_matrix(congress['repub'], preds_logit)[0,0] + 
confusion_matrix(congress['repub'], preds_logit)[1,1]) / np.sum(confusion_matrix(congress['repub'], preds_logit))

print "the count R^2 for the logit model: ", countR2_logit

the count R^2 for the logit model:  0.728735632184


So far, probit and logit are neck and neck. Their classification accuracy is almost exactly the same for both models, with probit doing one prediction better. Lets take a look at McFadden's R2 (aka the pseudo R2):

In [33]:
probit_results.prsquared, logit_results.prsquared

(0.21781295278040247, 0.21662053801812553)

Again, probit does slightly better than the logit. 

Lets dive in to what each measure of fit is capturing.

**Count R2** captures how well the model is classifying overall . It is not like other classic types of R2. It does not measure any kind of information gain from a null model to a fully specified model.

**McFadden's R2** = (1 - (log likelihood full model / log likelihood only intercept)). The log likelihood of the intercept model is treated as a total sum of squares, and the log likelihood of the full model is treated as the sum of squared errors (like in approach 1). The ratio of the likelihoods suggests the level of improvement over the intercept model offered by the full model. 


### (e) 
Which model do I recommend?

I recommend the probit model. In reality, both models are almost exactly the same, with the same predictive power. Probit edges out the logit with its higher pseudo R2, and slightly better count R2. Both model's marginal effects are of the same size and almost identical magnitude.


### (f)

Suppose we uniformly increase the number of african americans in every district by 5000. Describe how the model parameters and probabilities can be used to interpret this scenario. Being as specific as you can, investigate
the predicted probabilities for the first 15 observations from part (b). What is your estimate of the % of ‘Republican’ districts?


My model parameter for the black population is the percentage of that districts population that is black. So the increase in percentage will depend on how large that district is. My probit model predicts that for every percentage increase in the black population, the chances of having a republican representative goes down by 0.9 %.

In [34]:
congress_incr = congress.copy()
congress_incr['black'] = congress_incr.black + 5000
congress_incr['populatn'] = congress_incr.populatn + 5000

In [35]:
#get the increased percentages of populations
congress_incr.per_black = congress_incr[['black', 'populatn']].apply(lambda row: (row[0] / row[1])*100, axis = 1)

In [36]:
x_incr = sm.add_constant(congress_incr[ind_vars])
y_incr = congress_incr[dep_var]

Get the probability predictions for the increased black population. After that, we'll look at the change in probability from the actual population, to the increased population.

In [37]:
congress_incr['incr_probability'] = probit_results.predict(exog = x_incr)

In [38]:
congress_incr.incr_probability.mean()- congress_incr.probability.mean()

-0.007245320944302414

With the increase in 5000 black people in every district, the mean probability of a republican congressman goes down from 52.3 % to 51.6 %, a .00724 difference.

In [39]:
congress_incr['change_in_black'] = (congress_incr.per_black - congress.per_black)

look at the change in percentage of black population multiplied by its marginal effect to get the predicted drop in probability.

In [40]:
congress_incr.change_in_black.mean() * probit_marg.margeff[1]

-0.0069820518279979371

Thats actually really close to the difference in mean probabilities, only a 3% difference between the two.


Now, I investigate the predicted probabilities for the first 15 observations. Then, get the percentage of predicted republican districts.

In [41]:
x_matrix = np.matrix(x_incr[:15])

xi_b = x_matrix * beta_matrix
p_xi_b = norm.cdf(xi_b)

In [42]:
increase = x_incr.ix[:14, :].copy()
increase['P(x_i, B)'] = pd.DataFrame(p_xi_b)

In [43]:
increase[['per_black', 'P(x_i, B)']]

Unnamed: 0,per_black,"P(x_i, B)"
0,4.347271,0.246377
1,29.101831,0.269378
2,24.787059,0.582589
3,26.657058,0.36723
4,7.402527,0.574961
5,15.561929,0.640645
6,9.886288,0.852235
7,67.72522,0.011157
8,18.573657,0.435503
9,18.307529,0.685013


In [44]:
increase['predict_repub'] = increase['P(x_i, B)'].apply(lambda x: 1 if x >= 0.5 else 0)
increase['repub'] = y_incr[:15]

increase[['per_black', 'P(x_i, B)', 'predict_repub', 'repub']]

Unnamed: 0,per_black,"P(x_i, B)",predict_repub,repub
0,4.347271,0.246377,0,1.0
1,29.101831,0.269378,0,1.0
2,24.787059,0.582589,1,1.0
3,26.657058,0.36723,0,1.0
4,7.402527,0.574961,1,1.0
5,15.561929,0.640645,1,0.0
6,9.886288,0.852235,1,1.0
7,67.72522,0.011157,0,0.0
8,18.573657,0.435503,0,0.0
9,18.307529,0.685013,1,0.0


In [45]:
by_hand[['per_black', 'P(x_i, B)', 'predict_repub', 'repub']]

Unnamed: 0,per_black,"P(x_i, B)",predict_repub,repub
0,3.587035,0.253639,0,1.0
1,28.488133,0.275506,0,1.0
2,24.135529,0.590219,1,1.0
3,26.021632,0.374448,0,1.0
4,6.600203,0.58439,1,1.0
5,14.830528,0.648832,1,0.0
6,9.105636,0.857586,1,1.0
7,67.445751,0.011405,0,0.0
8,17.881948,0.443703,0,0.0
9,17.61217,0.692401,1,0.0


In [46]:
confusion_matrix(increase['repub'], increase['predict_repub'])

array([[3, 2],
       [4, 6]])

In [47]:
(increase.per_black - by_hand.per_black).mean()

0.6931856684885246

My model predicts that 75% of the districts will be republican. This is the same estimate as before the increase. Although the percentage of the black population increased by a mean of 0.69%, this was not enough to significantly tip the scales for any districts.

### (g)

Describe how the matrix of second derivatives (also known as the information matrix) of
the Likelihood Function is useful for calculating standard errors.

The information matrix is the negative (k + 1) x (k + 1) matrix of second derivatives. The inverse of this matrix is our estimated variance/covariance matrix for the parameters. The standard errors for parameter i is $\sqrt{I(\hat \theta_{ii})^{-1}}$. The reason we use the second derivative is that it tells us how fast our likelihood changes the farther we get away from the true parameter $\theta$

In [48]:
-2*(-927.514 + 2744.767)

-3634.5059999999994

In [49]:
from scipy.stats import chi2


In [50]:
chi2.cdf(3634.5059, 1)

1.0