In [2]:
import pandas as pd
from datascience import *
import numpy as np

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import ipywidgets as widgets
plt.style.use('fivethirtyeight')

---
# Prisons
---

## Data

**Cleaning dataset that will be used on final version of the notebobok on pandas to make cleaning easier.**

In [3]:
monthly_cdcr_population = pd.read_csv("monthly_cdcr_population.csv")
 

In [4]:
cdcr  = monthly_cdcr_population.drop(["source_pdf_name"], axis =1)


In [5]:
cdcr.to_csv(r'monthly_cdcr.csv')

In [6]:
cdcr

Unnamed: 0,year,month,institution_name,population_felons,civil_addict,total_population,designed_capacity,percent_occupied,staffed_capacity
0,1996,1,VSP (VALLEY SP),2294,0,2294,1980,115.9,1980
1,1996,1,SCC (SIERRA CONSERVATION CENTER),322,0,322,320,100.6,320
2,1996,1,NCWF (NO CAL WOMEN'S FACIL),786,4,790,400,197.5,760
3,1996,1,CCWF (CENTRAL CA WOMEN'S FAC),2846,13,2859,2004,142.7,3224
4,1996,1,"CRC (CAL REHAB CTR, WOMEN)",91,703,794,500,158.8,842
5,1996,1,CIW (CA INSTITUTION FOR WOMEN),1690,36,1726,1026,168.2,1646
6,1996,1,WSP (WASCO SP),4475,62,4537,2484,182.6,4484
7,1996,1,SCC (SIERRA CONSERVATION CENTER),6010,0,6010,3606,166.7,5884
8,1996,1,SRTA (SANTA RITA CO. JAIL-RC),811,0,811,395,205.3,750
9,1996,1,RJD (RJ DONOVAN CORRECTIONAL FAC),4577,0,4577,2200,208.0,4566


## **Widget**

In [7]:
# populations based off the year rather than month
data= Table().read_table("data_for_widget.csv")
grouped = data.group(["institution_name", "year"], sum)

new_percents = grouped.column("total_population sum") / grouped.column("designed_capacity sum") *100
grouped = grouped.with_column("Percent Occupied", new_percents)

institutions = grouped.group(0).column(0)
institutions

inp = widgets.IntSlider(
    value=0,
    min=0,
    max=39,
    step=1,
    description='Institution:',
    orientation='horizontal',
    readout= True,
    readout_format='d'
)

def f(inp):
    inst = grouped.where(0, institutions[inp])
    inst.plot(1, "Percent Occupied")
    year1 = inst.column("year") 
    if np.any(year1 == 2011):
#         plt.axvline(x=2011, color = "red")
        point1 = inst.where("year", 2011).column("Percent Occupied").item(0)
        plt.plot([2011], [point1], 'ro')
        
        plt.annotate("(2011, {0}%)".format(round(point1, 2)),
        xy=(2011, round(point1, 2)), xytext=(-15, 0),
        textcoords='offset points', ha='right', va='bottom',
        bbox=dict(boxstyle='round,pad=0.5', fc='red', alpha=0.3),
        arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
        
    if np.any(year1 == 2013):
#         plt.axvline(x=2013, color = "cyan")
        point2 = inst.where("year", 2013).column("Percent Occupied").item(0) 
        plt.plot([2013], [point2], 'co')
        
        plt.annotate("(2013, {0}%)".format(round(point2, 2)),
        xy=(2013, round(point2, 2)), xytext=(35, 20),
        textcoords='offset points', ha='right', va='bottom',
        bbox=dict(boxstyle='round,pad=0.5', fc='cyan', alpha=0.3),
        arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
        
    if np.any(year1 == 2014):
#         plt.axvline(x=2014, color = "orange")
        point3 = inst.where("year", 2014).column("Percent Occupied").item(0)
        plt.plot([2014], [point3], 'yo')
        
        plt.annotate("(2014, {0}%)".format(round(point3, 2)),
        xy=(2014, round(point3, 2)), xytext=(120, 15),
        textcoords='offset points', ha='right', va='bottom',
        bbox=dict(boxstyle='round,pad=0.5', fc='orange', alpha=0.3),
        arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
        
        
    plt.title(institutions[inp])
    
out = widgets.interactive_output(f, {'inp': inp})
#out.clear_output()

In [8]:
#widgets.HBox([widgets.VBox([inp]), out])

In [9]:
# NOTES ABOUT THE WIDGET:
#Do not worry about the code in the next section.
#You will not have to implement anything like this. 
#The code produces a widget which allows you to 
#interact with the dataset. In this example, you 
#will be able to scroll through all of the prisons 
#in Calfornia, and you can see how the designed 
#capacity versus population has changed over time.

In [10]:
# # attempt 2 not repeating plots
# from ipywidgets import interact, interactive
# from IPython.display import clear_output, display, HTML
# w = interactive(f, inp =(0,39))
# display(w)

## Gender

In [11]:
# Need to come back to male-female instituions divide 
# institutions = grouped.group(0).column(0)
# (institutions)

---
# Jails
---

## Data 

In the following two cells, we load the data into our Jupyter Notebook. Our data is currently separated into multiple CSV files based on the year the jail data is from. We can take a look at the 1995 jail data in the next code cell, to get a sense of what our dataframe of jail data will look like.

In [12]:
jail_1995 = pd.read_csv("jails_data/1995.csv", header = 1)
jail_1995

Unnamed: 0,Jurisdiction,Facility,Year,Month,Unsentenced males,Unsentenced females,Sentenced males,Sentenced females,Total facility ADP
0,Alameda Sheriff's Dept.,Glen Dyer Jail,1995,10,539,1,143,0,683.00
1,Alameda Sheriff's Dept.,Santa Rita Jail,1995,10,1983,197,887,166,3233.00
2,Amador Sheriff's Dept.,Amador County Jail,1995,10,15.33,1.59,21.7,4.85,43.47
3,Butte Sheriff's Dept.,Butte County Jail,1995,10,225,39,158,15,437.00
4,Calaveras Sheriff's Dept.,Calaveras County Jail,1995,10,20.8,0.4,29.6,3.9,54.70
5,Colusa Sheriff's Dept.,Colusa County Jail,1995,10,26,1,31,1,59.00
6,Contra Costa Sheriff's Dept.,Marsh Creek Detention Facility,1995,10,0,0,230,0,230.00
7,Contra Costa Sheriff's Dept.,Martinez Detention Facility,1995,10,647,0,0,0,647.00
8,Contra Costa Sheriff's Dept.,West County Detention Facility,1995,10,597,0,0,0,597.00
9,Del Norte Sheriff's Dept.,Del Norte County Jail,1995,10,65.83,7.22,0,0,73.05


The following cell goes through all of the CSV files and combines them into one table that we can look at in order to analyze - you do not need to understand the following code chunk completely.

In [13]:
all_years = pd.DataFrame()
for i in range(1995, 2019):
    df_name = "jails_data/" + str(i) + ".csv"
    df = pd.read_csv(df_name, header = 1)
    all_years = pd.concat([all_years, df])
all_years

Unnamed: 0,Jurisdiction,Facility,Year,Month,Unsentenced males,Unsentenced females,Sentenced males,Sentenced females,Total facility ADP
0,Alameda Sheriff's Dept.,Glen Dyer Jail,1995,10,539,1,143,0,683.00
1,Alameda Sheriff's Dept.,Santa Rita Jail,1995,10,1983,197,887,166,3233.00
2,Amador Sheriff's Dept.,Amador County Jail,1995,10,15.33,1.59,21.7,4.85,43.47
3,Butte Sheriff's Dept.,Butte County Jail,1995,10,225,39,158,15,437.00
4,Calaveras Sheriff's Dept.,Calaveras County Jail,1995,10,20.8,0.4,29.6,3.9,54.70
5,Colusa Sheriff's Dept.,Colusa County Jail,1995,10,26,1,31,1,59.00
6,Contra Costa Sheriff's Dept.,Marsh Creek Detention Facility,1995,10,0,0,230,0,230.00
7,Contra Costa Sheriff's Dept.,Martinez Detention Facility,1995,10,647,0,0,0,647.00
8,Contra Costa Sheriff's Dept.,West County Detention Facility,1995,10,597,0,0,0,597.00
9,Del Norte Sheriff's Dept.,Del Norte County Jail,1995,10,65.83,7.22,0,0,73.05


Some of our data within our new combined dataframe has some weird things in it that we need to account for. For example, when we expect numbers for the count of Unsentenced males/females and Sentenced males/females, instead of numbers, there are some letters, like "d" and "u". When analyzing the source of the data closer, we find that these values represent 0. Therefore we first need to do some data cleaning to account for "d" and "u" values in the data.

In [14]:
all_years[all_years["Unsentenced males"] == 'u']
cleaned = all_years.replace("d", 0)
cleaned = cleaned.replace("u", 0)
cleaned["Unsentenced males"] = cleaned["Unsentenced males"].astype('float64')
cleaned["Unsentenced females"] = cleaned["Unsentenced females"].astype('float64')
cleaned["Sentenced males"] = cleaned["Sentenced males"].astype('float64')
cleaned["Sentenced females"] = cleaned["Sentenced females"].astype('float64')
cleaned.head()

Unnamed: 0,Jurisdiction,Facility,Year,Month,Unsentenced males,Unsentenced females,Sentenced males,Sentenced females,Total facility ADP
0,Alameda Sheriff's Dept.,Glen Dyer Jail,1995,10,539.0,1.0,143.0,0.0,683.0
1,Alameda Sheriff's Dept.,Santa Rita Jail,1995,10,1983.0,197.0,887.0,166.0,3233.0
2,Amador Sheriff's Dept.,Amador County Jail,1995,10,15.33,1.59,21.7,4.85,43.47
3,Butte Sheriff's Dept.,Butte County Jail,1995,10,225.0,39.0,158.0,15.0,437.0
4,Calaveras Sheriff's Dept.,Calaveras County Jail,1995,10,20.8,0.4,29.6,3.9,54.7


In [15]:
# exporting to a csv file to use for the future
cleaned.to_csv(r"jails_cleaned.csv")