In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats


plt.style.use('ggplot')
plt.rcParams.update({'font.size': 13})

In [None]:
%store -r df3

In [None]:
import sys
import src.analysis as an


### Confidence Intervals and p-values

In [None]:
# df3['age_groups'] = pd.cut(df3.Age, 
#                                      [13, 18, 24, 34, 44, 54, 64, 90], 
#                                      include_lowest=True)


In [None]:
# df3['length_groups']=pd.cut(df3.length_of_segments, 
#                             [0, 1, 30, 100, 200, 400, 800, 1600], 
#                                      include_lowest=True)



### Age <br>
* $H_0$: Successful and unsuccessful groups do not vary by age.<br>
* $H_a$: Successful and unsuccessful groups do vary by age.<br>
alpha=0.05<br>
statistical test=Mann–Whitney U test<br>


In [None]:
successful= df3[df3['TERMINATIONID']==1]['Age']
unsuccessful= df3[df3['TERMINATIONID']==0]['Age']

print("Mann-Whitney U test:")
print(stats.mannwhitneyu(successful, unsuccessful, alternative = 'two-sided'))
#p-value>0.05 fail to reject null hypothesis

### Gender

In [None]:
gender=an.identify_n_p_variable(df3,'OSEX')
for i in gender:
    an.find_confidence_intervals_binomial(i[0], i[1], i[2])

#### Hypothesis 
* $H_0$: Pretrial services success and gender are independent.<br>
* $H_a$: Pretrial services success and gender are not independent.<br>
alpha with Bonferroni Correction=0.025 (0.05/2)<br>
statistical test=chi-squared test of independence<br>

In [None]:
#chi squared test of independence
crosstab = pd.crosstab(df3['OSEX'], df3['TERMINATIONID'])
print("Chi Squared test of independence:")
print(stats.chi2_contingency(crosstab))

#p-value>0.025 fail to reject null hypothesis

### Race

In [None]:
race=an.identify_n_p_variable(df3,'RACE')
for i in race:
    an.find_confidence_intervals_binomial(i[0], i[1], i[2])

### Ethnicity

In [None]:
ethnicity=an.identify_n_p_variable(df3,'ETHNICID')
for i in ethnicity:
    an.find_confidence_intervals_binomial(i[0], i[1], i[2])

#### Hypothesis
* $H_0$: Pretrial services success and ethnicity are independent.<br>
* $H_a$: Pretrial services success and ethnicity are not independent.<br>
alpha with Bonferroni Correction=0.0166 (0.05/3)<br>
statistical test=chi-squared test of independence<br>

In [None]:
#chi squared test of independence
crosstab = pd.crosstab(df3['ETHNICID'], df3['TERMINATIONID'])
stats.chi2_contingency(crosstab)
print("Chi Squared test of independence:")
print(stats.chi2_contingency(crosstab))
#p>0.0166 fail to reject null hypothesis

### Offense

In [None]:
offense=an.identify_n_p_variable(df3,'OFFENSECATID')
for i in offense:
    an.find_confidence_intervals_binomial(i[0], i[1], i[2])

### Risk Level

In [None]:
risk=an.identify_n_p_variable(df3,'CPATCATID')
for i in risk:
    an.find_confidence_intervals_binomial(i[0], i[1], i[2])

### Length of Supervision

#### Hypothesis
* $H_0$: Successful and unsuccessful groups do not vary by length of supervision.<br>
* $H_a$: Successful and unsuccessful groups do vary by length of supervision.<br>
alpha with Bonferroni Correction=0.0125 (0.05/4)<br>
statistical test=Mann–Whitney U test<br>

In [None]:
successful= df3[df3['TERMINATIONID']==1]['length_of_segments']
unsuccessful= df3[df3['TERMINATIONID']==0]['length_of_segments']

print("Mann-Whitney U test:")
print(stats.mannwhitneyu(successful, unsuccessful, alternative = 'two-sided'))
#p<0.0125 reject null hypothesis

In [None]:
fig,ax = plt.subplots()


successful = stats.norm(successful.mean(), successful.std())
unsuccessful = stats.norm(unsuccessful.mean(), unsuccessful.std())

support = np.linspace(unsuccessful.ppf(0.0001), successful.ppf(0.9999), 100)
successful_pdf = successful.pdf(support)
unsuccessful_pdf = unsuccessful.pdf(support)

ax.plot(support, successful_pdf, color='blue', label='Successful')
ax.fill(support, successful_pdf, color='blue', alpha=0.5)
ax.plot(support, unsuccessful_pdf, color='red', label='Unsuccessful')
ax.fill(support, unsuccessful_pdf, color='red', alpha=0.5)
ax.legend()
ax.set_title("Distribution of Sample Means for Length of Supervision");