# COGS 108 - Capstone Project

## Project links, files, and basic information

### Websites with datasets:
- San Diego Vehicle Stops:  https://data.sandiego.gov/datasets/police-vehicle-stops/
- Dan Diego Population Data:  http://www.city-data.com/city/San-Diego-California.html

### Websites of needed information:
- San Diego police service areas https://www.sandiego.gov/police/services/divisions (vehcle stop data only records the first two digits)
- San Diego zip code map: http://www.city-data.com/zipmaps/San-Diego-California.html

### Names of datasets
#### *Vehicle Stops*
- 'vehicle_stops_2017.csv'
- 'vehicle_stops_2016.csv'
- 'vehicle_stops_2015.csv'
- 'vehicle_stops_2014.csv'

#### *Vehicle Stops Details*
- 'vehicle_stops_search_details_2017.csv'
- 'vehicle_stops_search_details_2016.csv'
- 'vehicle_stops_search_details_2015.csv'
- 'vehicle_stops_search_details_2014.csv'

#### *Files needed to read Vehicle Stops information*
- Race Codes: 'vehicle_stops_race_codes.csv'    
- Title explanations for Vehicle Stops data: 'vehicle_stops_dictionary.csv'
- Title explanations for Vehicle Stops Details data: 'vehicle_stops_search_details_dictionary.csv'
- Possible actions taken when stopped for Vehicle Stops Details data: 'vehicle_stops_search_details_description_list.csv'

In [None]:
# Imports
%matplotlib inline

# Basics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data analysis
import patsy
import statsmodels.api as sm
import scipy.stats as stats
from scipy.stats import ttest_ind, chisquare, normaltest

# Interactable 
from ipywidgets import interactive

# PDF Reading
!pip install PyPDF2
import PyPDF2 as pdf

import requests
import os
import locale

# <font color='Red'>[</font> <font color='Blue'>Data Cleaning</font> <font color='Red'>]</font>

## <font color='Blue'>-></font> Cleaning stops dataframe - Fuctions
### Clean unwanted columns

In [6]:
# Wanted column titles for stops dataframe
stops_col_titles = ['stop_id','stop_cause','service_area','subject_race','subject_sex','subject_age',
                    'arrested','searched','contraband_found','property_seized']

In [7]:
# Funtion to get rid of unwanted columns in vehicle stop dataset - Alberto
# Params: stops - dataset of stops to clean
def clean_stops_cols(stops):
    
    #Obtain unwated columns and drop them
    drop_list = np.setdiff1d(list(stops),stops_col_titles)
    stops.drop(drop_list, axis=1, inplace=True)
    
    return stops

### Clean NaNs and missing values

In [8]:
#If nans exist of these columns the entry will be dropped
clean_nans_cols = ['stop_cause', 'stop_id', 'subject_race', 'subject_sex', 'subject_age']

In [9]:
# Function to get rid of nans vehicle stop dataset - Alberto
# Params: stops - dataset of stops to clean
def clean_stops_nans(stops):
    
    # Here we assume a Nan means a No in these columns (Since the majority of columns had 'Nan' instead of 'N')
    stops['arrested'] = stops['arrested'].replace({np.nan:'N'})
    stops['searched'] = stops['searched'].replace({np.nan:'N'})
    stops['contraband_found'] = stops['contraband_found'].replace({np.nan:'N'})
    stops['property_seized'] = stops['property_seized'].replace({np.nan:'N'})
    
    stops.dropna(how = 'any', subset = clean_nans_cols, inplace = True)
    
    return stops

## <font color='Blue'>-></font> Cleaning stops details dataframe - Functions
### Clean unwanted columns of stop details dataset

In [10]:
# Wanted column titles for stops information dataframe
stops_info_col_titles = ['stop_id','search_details_type','search_details_description']

In [11]:
# Funtion to get rid of unwanted columns in vehicle stop informationdataset - Alberto
# Params: stops_info - dataset of stops information to clean
def clean_stops_info_cols(stops_info):
    
    #Obtain unwated columns and drop them
    drop_list = np.setdiff1d(list(stops_info),stops_info_col_titles)
    stops_info.drop(drop_list, axis=1, inplace=True) 
    
    return stops_info

### Clean NaNs and missing values

In [12]:

# Take out meaningless entry
# Params: stops_info - dataset of stops information to clean
def clean_stops_info_meaningless(stops_info):
    
    stops_info = stops_info[~((stops_info['search_details_type'] == 'ActionTakenOther') 
                                      & stops_info['search_details_description'].isnull())]
    stops_info = stops_info[~((stops_info['search_details_type'] == 'ActionTaken') 
                                      & (stops_info['search_details_description'] == 'Other'))]
    stops_info = stops_info[~((stops_info['search_details_type'] == 'SearchBasis') 
                                      & (stops_info['search_details_description'] == 'Other'))]
    return stops_info

In [13]:
# Standarize action type entry
# Params: action - string to be standarized
def standardize_action_type(action_type):
    action_type = str(action_type)
    action_type = action_type.lower()
    
    if 'action' in action_type:
        action_type = 'action'
    
    elif 'search' in action_type:
        action_type = 'search'
        
    return action_type

In [37]:
# Standarize action details entry
# Params: action - string to be standarized
def standardize_action_desc(action):
    
    # Otherwise move onto parsinf
    action = str(action)
    action = action.lower()

    if 'arrest' in action:
        action = ['arrest']
        
    elif '310' in action:
        action = ['310']
        
    elif 'imp' in action:
        action = ['impound']

    elif 'tow' in action:
        action = ['tow']
        
    elif 'mistake' in action:
        action = ['released']
        
    elif 'released' in action:
        action = ['released']
        
    elif 'leave' in action:
        action = ['released']
        
    elif 'free' in action:
        action = ['released']
        
    elif 'no vio' in action:
        action = ['released']
        
    elif 'no dui' in action:
        action = ['released']
        
    elif 'nothing' in action:
        action = ['released']
         
    elif 'notice' in action:
        action = ['suspension notice']
        
    elif 'plate' in action:
        action = ['check plate']
        
    elif 'passenger' in action:
        action = ['passenger']
        
    elif 'license' in action:
        action = ['license']
        
    elif 'dui' in action:
        action = ['dui eval']
        
    elif 'det' in action:
        action = ['detention']
        
    elif 'contact' in action:
        action = ['contact']
        
    elif 'suspen' in action:
        action = ['suspension']
    
    elif 'susp' in action:
        action = ['suspect']
        
    elif 'cit' in action:
        action = ['citation']
        
    elif 'dmv' in action:
        action = ['DMV issue']
        
    elif 'nan' in action:
        action = 'Other'
        
    else:
        action = 'Other'
        
    return action

In [15]:
# Clean nans and reduce descriptions
# Params: stops_info - dataset of stops information to clean
def clean_stops_info_nans(stops_info):
    
    # Clean meaningless columns
    stops_info = clean_stops_info_meaningless(stops_info)
    
    # Clean type column
    type_title = 'search_details_type'
    stops_info[type_title] = stops_info[type_title].apply(standardize_action_type)
    
    # Clean details column
    desc_title = 'search_details_description'
    stops_info[desc_title] = stops_info[desc_title].apply(standardize_action_desc)
    
    # Remove 'Other' and nan entries as they do not give us any extra information
    stops_info = stops_info[~(stops_info['search_details_description'] == "Other")]
    stops_info.dropna(how = 'any', subset = stops_info_col_titles, inplace = True)
    
    return stops_info

## <font color='Blue'>-></font> Final cleaning functions - Combining it all

In [16]:
# Combine cleaning dataframe functions into one
# Params: stops - stops dataframe to be cleaned
def clean_stops(stops):
    stops = clean_stops_cols(stops)
    stops = clean_stops_nans(stops)
    
    return stops

In [17]:
# Combine cleaning dataframe functions into one
# Params: stops_info - stops information dataframe to be cleaned
def clean_stops_info(stops_info):
    stops_info = clean_stops_info_cols(stops_info)
    stops_info = clean_stops_info_nans(stops_info)
    return stops_info

## <font color='Blue'>-></font> Merging stops and details dataset

In [18]:
# Helper function: Merges duplicates within the information dataset
# Params: info - dataframe with stops information
def merge_duplicates(info):
    
    deleted = 0
    last_index = len(info) -1

    for index, row in info.iterrows():
    
        if deleted > 0:
            deleted -= 1
        
        elif index < last_index:
        
            s_id = row['stop_id']
        
            next_index = index+1
            next_id = info['stop_id'][next_index]
    
            while (s_id == next_id) & (next_index <= last_index):
            
                # Grab entry of duplicate
                entry = info.loc[next_index, 'search_details_description']
            
                # Append duplicate entry to original
                info.loc[index, 'search_details_description'].append(entry[0])
            
                # Drop duplicate row
                info.drop(next_index, inplace=True)
            
                # Increase index of next row
                next_index += 1
            
                # Check for out of bounds
                if next_index  < last_index:
                    next_id = info['stop_id'][next_index]
                
                deleted += 1
    return info

In [19]:
# Function: Merge the stops and details dataframes
# Params: stops - dataframe with stops information
#          info - dataframe with stop details
def merge_dataframes(stops, info):
    
    
    # Drop type information
    info.drop('search_details_type', axis=1, inplace=True)
    
    # Reset indeces
    info = info.reset_index()
    info.drop('index', axis=1, inplace=True)
    
    # Merge duplicates of information dataset
    info = merge_duplicates(info)
    
    df_merged = stops.merge(info, on = ['stop_id'], how = 'left')
    
    return df_merged

# <font color='Red'>[</font> <font color='Blue'>Seting up for Data Analysis</font> <font color='Red'>]</font>

## <font color='Blue'>-></font> Mapping functions and variables

### Mapping of individuals' races

In [121]:
# Function that maps all of the police race data into categories given in census
# Param: race - character correcponding to a race to be assigned
# Return: (Based on census) A = asian, B = black, H = hispanic, I = indian, O = other
def assign_race(race):
    
    if race in ['A','C','D','F','J','K','L','V','I']:
        return 'A'
    
    elif race == 'B':
        return 'B'
    
    elif race == 'H':
        return 'H'
    
    elif race == 'W':
        return 'W'
    
    elif race in ['E','G','M','N','O','P','Q','R','S','T','U','Y','Z']:
        return 'O'
    
    else:
        return 'X'

### Mapping of police area to zip code

In [21]:
# Function that maps zip codes to police areas
def get_area_zips(area):
    code_dict = {'110':list(['92122', '1', '92117', '1', '92111', '0.8', '92110', '0.4' ]),
                '120': list(['92109', '1', '92037', '1']),
                '130': list(['0']),
                '230': list(['92129', '1', '92128', '1', '92127', '0.3', '92025', '0.3']),
                '240': list(['92145', '1', '92126', '1', '92131', '1']),
                '310': list(['92123', '1', '92124', '1', '92108', '1', '92111', '0.2']),
                '320': list(['92120', '1', '92119', '1']),
                '430': list(['92139', '1', '92114', '1']),
                '440': list(['92136', '1', '92102', '0.4', '92113', '0.5']),
                '510': list(['92113', '0.5', '92102', '0.6']),
                '520': list(['92101', '1']),
                '530': list(['0']),
                '610': list(['92107', '1', '92106', '1', '92140', '1', '92110', '0.6']),
                '620': list(['92103', '1']),
                '630': list(['0']),
                '710': list(['92173', '1', '92154', '0.4']),
                '720': list(['92154', '0.6']),
                '810': list(['0']),
                '820': list(['92115', '1', '92116', '0.6']),
                '830': list(['92105', '0.8']),
                '840': list(['0']),
                '930': list(['92121', '1', '92130', '1', '92014', '1', '92091', '1', '92127', '0.7']),
                'Unknown': list(['0'])
                }
    return code_dict[area]

### List of possible police actions taken

In [22]:
# List of actions that can appear in merged stops dataframes 
actions = list(['arrest', '310', 'impound', 'tow', 'released', 'suspension notice', 'check plate', 'passenger',
                'license', 'dui eval', 'detention', 'contact', 'suspension', 'suspect', 'citation', 'DMV issue', 
                'other', 'NaN', 'total'])

### List of years

In [446]:
years_list = ['2017', '2016', '2015', '2014']

### List of police codes

In [447]:
areas_codes_list = ['110','120','230','240','310','320','430','440','510','520','610','620','710','720','820','830','930']

### List of races

In [444]:
# Races that make up the majority of the san diego area
races = list(['W', 'H', 'A', 'B'])

## <font color='Blue'>-></font> PDF Reading

### Get population percentages by race

In [91]:
# Function to get population data from a specific zip code
# Params: code: zip code to be extracted
#         percent: percentage of the population of the zip code you are looking into
def get_zip_info(code, percent):
    locale.setlocale(locale.LC_ALL, '')
    currDir = 'zip_pop_data/'
    try:
        file = currDir + code + '.pdf'
        fpdf = pdf.PdfFileReader(file)
        page = fpdf.getPage(0).extractText()

        # Gets the beginning and end of the data we want
        index = page.find('Population\nPercent\nTotal Population')
        indexEnd = page.find('Source: SANDAG, Current Estimates (2010)\nPopulation by Race')
        text = page[index+19:indexEnd-1]
        text = list(text)
        for index, item in enumerate(text):
            if item == "\n":
                text[index] = '/'

        text = ''.join(text)
        text = text.split('/')
        groups = list()
        percentages = list()
        populations = list()
        cols = ['Group', 'Population', 'Percent']

        for item in text:
            if '%' in item:
                percentages.append(item)
            elif item[0].isnumeric():
                populations.append(locale.atoi(item) * np.float(percent))
            else:
                groups.append(item) 

        p_df = pd.DataFrame(columns = cols)
        p_df['Group'] = groups
        p_df['Population'] = populations
        p_df['Percent'] = percentages
        p_df.set_index('Group', inplace=True)
        p_df = p_df.reindex(["Total Population", "White", "Hispanic", "Asian", "Black", "Two or More", "American Indian",
                    "Pacific Islander", "Other"])
        p_df.fillna(0.0, inplace=True)
        
        # Makes sure that each value in percent column has a % sign on it - fixes errors caused by null
        
        for index, row in p_df.iterrows():
            if '%' not in str(row['Percent']):
                p_df.loc[index, 'Percent'] = str(row['Percent']) + '%'
        return p_df
    except PermissionError:
        print('error')

In [93]:
print(get_zip_info('92122', '1'))

                  Population Percent
Group                               
Total Population     43382.0    100%
White                23612.0     54%
Hispanic              4083.0      9%
Asian                13321.0     31%
Black                  635.0      1%
Two or More           1464.0      3%
American Indian         56.0     <1%
Pacific Islander        72.0     <1%
Other                  139.0     <1%


## <font color='Blue'>-></font> Building data-structures for analysis

### Dataframe to hold information about actions and population for every police area

In [24]:
# Function - Sets up police area dictionary for a given year
# Return: Dictionary where each police code has two empty dataframes
#         The dataframes will correspond to police actions per race and population percentages for each area
def get_year_areas():
    df_areas = {
        '110': [pd.DataFrame(), pd.DataFrame()],
        '120': [pd.DataFrame(), pd.DataFrame()],
        '130': [pd.DataFrame(), pd.DataFrame()],
        '230': [pd.DataFrame(), pd.DataFrame()],
        '240': [pd.DataFrame(), pd.DataFrame()],
        '310': [pd.DataFrame(), pd.DataFrame()],
        '320': [pd.DataFrame(), pd.DataFrame()],
        '430': [pd.DataFrame(), pd.DataFrame()],
        '440': [pd.DataFrame(), pd.DataFrame()],
        '510': [pd.DataFrame(), pd.DataFrame()],
        '520': [pd.DataFrame(), pd.DataFrame()],
        '530': [pd.DataFrame(), pd.DataFrame()],
        '610': [pd.DataFrame(), pd.DataFrame()],
        '620': [pd.DataFrame(), pd.DataFrame()],
        '630': [pd.DataFrame(), pd.DataFrame()],
        '710': [pd.DataFrame(), pd.DataFrame()],
        '720': [pd.DataFrame(), pd.DataFrame()],
        '810': [pd.DataFrame(), pd.DataFrame()],
        '820': [pd.DataFrame(), pd.DataFrame()],
        '830': [pd.DataFrame(), pd.DataFrame()],
        '840': [pd.DataFrame(), pd.DataFrame()],
        '930': [pd.DataFrame(), pd.DataFrame()],
        'Unknown': [pd.DataFrame(), pd.DataFrame()]
    }
    
    return df_areas

### Functions to fill in information into above dataframe per year

In [187]:
# Function: Fills in the first dataframe at the given year with the total
#           number of police actions for each race in each police area
# Params: year - year whose dataframe (containing sum of actions per race) will be filled
#         year_df - dataframe to be filled
def get_code_race_data(year, year_df):
    
    # Columns corresponding to races
    cols = ['W', 'B', 'A', 'H','O']
    
    # Initialize dataframe with action and race columns for each police area in the year
    for current_area in year_df:
        year_df[current_area][0] = pd.DataFrame(columns = cols)
        year_df[current_area][0]['Action'] = actions
        year_df[current_area][0].fillna(0, inplace=True)
        year_df[current_area][0].set_index('Action', inplace=True)

    # Counts different actions for every area
    for index, row in years[year].iterrows():
        
        race = assign_race(row['subject_race'])
        if race == 'X':
            continue
            
        desc = row['search_details_description']
        area = row['service_area']
        if desc is not np.nan:
            for item in desc:
                year_df[area][0].loc[item, race] += 1
                
    # Sums up total total
    for item in cols:
        for current_area in year_df:
            year_df[current_area][0].loc['total', item] = year_df[current_area][0][item].sum()

In [172]:
# Function: Fills in the second dataframe at the given year with
#           the total population counts for eachpolice area
# Params: year - year whose dataframe of total population counts will be filled
#         year_df - dataframe to be filled

def fill_area_pop_data(year, year_df):
    for area in year_df:
        codes = get_area_zips(area)
        if len(codes) is not 1:
            df_total = get_zip_info(codes[0], codes[1])
            
            # Since each police area covers multiple zip codes, we must loop through all codes in each area
            for index in range(2,len(codes)-1, 2):
                df_temp = get_zip_info(codes[index], codes[index+1])
                df_total = df_total.add(df_temp, fill_value=0)

            for index, row in df_total.iterrows():
                # Calculates the new percentages of the added zip codes
                pop = np.float((np.float(row['Population']) / np.float(df_total['Population'][0] * 100)))
                df_total.loc[index, 'Percent'] = np.float(pop) * 10000
                # Turning Populations into ints
                df_total.loc[index, 'Population'] = int(df_total.loc[index, 'Population'])

            year_df[area][1] = df_total
        else:
            continue

In [173]:
# Function: Calculates total number actions from a given year's dataframe
# Params: year - year whose dataframe we are looking into
#         currArea - police area in dataframe we are counting
#         df - dataframe of corresponding year we are looking into
def get_total(year, currArea, df):
    total = 0
    for item in df[currArea][0].columns:
        total += df[currArea][0].loc['total', item]
    return total

# <font color='Red'>[</font> <font color='Blue'>Data Reading</font> <font color='Red'>]</font>

## <font color='Blue'>-></font> Read, clean, and merge data

In [174]:
# Read and clean stops datasets and clean
df_stops_17 = clean_stops(pd.read_csv('vehicle_stops_2017.csv'))
df_stops_16 = clean_stops(pd.read_csv('vehicle_stops_2016.csv'))
df_stops_15 = clean_stops(pd.read_csv('vehicle_stops_2015.csv'))
df_stops_14 = clean_stops(pd.read_csv('vehicle_stops_2014.csv'))

In [175]:
# Read and clean stop details datasets
df_stops_info_17 = clean_stops_info(pd.read_csv('vehicle_stops_search_details_2017.csv'))
df_stops_info_16 = clean_stops_info(pd.read_csv('vehicle_stops_search_details_2016.csv'))
df_stops_info_15 = clean_stops_info(pd.read_csv('vehicle_stops_search_details_2015.csv'))
df_stops_info_14 = clean_stops_info(pd.read_csv('vehicle_stops_search_details_2014.csv'))

In [176]:
# Merge above datasets 
df_merged_17 = merge_dataframes(df_stops_17, df_stops_info_17)
df_merged_16 = merge_dataframes(df_stops_16, df_stops_info_16)
df_merged_15 = merge_dataframes(df_stops_15, df_stops_info_15)
df_merged_14 = merge_dataframes(df_stops_14, df_stops_info_14)

KeyboardInterrupt: 

In [None]:
# Store merged datasets for easier access
years = {
    '2017': df_merged_17,
    '2016': df_merged_16,
    '2015': df_merged_15,
    '2014': df_merged_14
}

In [None]:
# Take a peek at 2017 dataset
years['2017'].head(5)

## <font color='Blue'>-></font> Fill in datasets with data analitics

In [None]:
# Loads in all the data at once. Takes roughly 6 mins to run, but will make the rest of the program much faster 
df_2017 = get_year_areas()
df_2016 = get_year_areas()
df_2015 = get_year_areas()
df_2014 = get_year_areas()

get_code_race_data('2017', df_2017)
get_code_race_data('2016', df_2016)
get_code_race_data('2015', df_2015)
get_code_race_data('2014', df_2014)

fill_area_pop_data('2017', df_2017)
fill_area_pop_data('2016', df_2016)
fill_area_pop_data('2015', df_2015)
fill_area_pop_data('2014', df_2014)

print('Done')

In [None]:
df_years = {
    '2017':df_2017,
    '2016':df_2016,
    '2015':df_2015,
    '2014':df_2014
}

In [129]:
df_years['2017']['930'][1]

Unnamed: 0_level_0,Population,Percent
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
Total Population,96512.0,100.0
White,61684.0,63.9137
Hispanic,7519.0,7.79167
Asian,22111.0,22.9105
Black,1171.0,1.21374
Two or More,3528.0,3.65581
American Indian,120.0,0.124648
Pacific Islander,117.0,0.121539
Other,259.0,0.268775


## <font color='Blue'>-></font> Test dataframe visuals

### Police action counts

In [130]:
# Police Actions count for police area 930 in 2017
print(df_years['2017']['930'][0])

                     W   B   A    H    O
Action                                  
arrest               3   1   0    2    0
310                  1   0   0    0    0
impound              7   0   0    2    0
tow                  1   0   0    0    0
released             0   0   0    0    0
suspension notice    1   0   0    0    1
check plate          0   0   0    0    0
passenger            3   0   0    0    0
license              0   0   0    0    0
dui eval             1   0   0    0    0
detention            0   0   0    1    0
contact              0   1   0    0    0
suspension           0   0   0    0    0
suspect              0   0   0    0    0
citation           458  33  96  122  104
DMV issue            0   0   0    0    0
other                0   0   0    0    0
NaN                  0   0   0    0    0
total              475  35  96  127  105


### Test visual: Race percentages

In [131]:
# Population percentages 
print(df_years['2017']['120'][1])

                  Population   Percent
Group                                 
Total Population     84454.0       100
White                65823.0   77.9395
Hispanic              8933.0   10.5774
Asian                 5813.0   6.88304
Black                  870.0   1.03015
Two or More           2406.0   2.84889
American Indian        162.0   0.19182
Pacific Islander       140.0  0.165771
Other                  307.0  0.363511


# <font color='Red'>[</font> <font color='Blue'>Data Visualization Prep</font> <font color='Red'>]</font>

## <font color='Blue'>-></font> Getters for statistics and graphs

### Percentage of stops per race 

In [412]:
def get_stops_percentage(area, race, year):
    df_pop = df_years[year][area][1]
    df_actions = df_years[year][area][0]
    total = get_total(year, area, df_years[year])
    
    switch = {
        'W': np.float(df_actions.loc['total', race] / total) ,
        'B': np.float(df_actions.loc['total', race] / total) ,
        'A': np.float(df_actions.loc['total', race] / total) ,
        'H': np.float(df_actions.loc['total', race] / total)
    }
                      
    return switch[race] * 100

In [435]:
def stops_percentage_graph(area, year):
    ratios = list()
    for race in races:
        ratios.append(get_stops_percentage(area, race, year))

    plt.figure(figsize=(6, 4), dpi=75)
    plt.title('Percentage of stops corresponding to races\nYear:'+year+'  Police area:'+area)
    plt.xlabel('Race')
    plt.ylabel('Percentage')
    plt.bar(races, ratios, width=.50)
    plt.show()

### Population percentages 

In [424]:
def get_population_percentage(area, race, year):
    df_pop = df_years[year][area][1]
    df_actions = df_years[year][area][0]
    total = get_total(year, area, df_years[year])
    
    switch = {
        'W': np.float(df_pop.loc['White', 'Percent']),
        'B': np.float(df_pop.loc['Black', 'Percent']),
        'A': np.float(df_pop.loc['Asian', 'Percent']),
        'H': np.float(df_pop.loc['Hispanic', 'Percent'])
    }
                      
    return switch[race]

In [434]:
def population_percentage_graph(area, year):
    ratios = list()
    for race in races:
        ratios.append(get_population_percentage(area, race, year))

    plt.figure(figsize=(6, 4), dpi=75)
    plt.title('Percentage of of race in population\nYear:'+year+'  Police area:'+area)
    plt.xlabel('Race')
    plt.ylabel('Percentage')
    plt.bar(races, ratios, width=.50)
    plt.show()

### Stops per race percentages vs. population percentages

In [426]:
def race_compare(area, race, year):
    df_pop = df_years[year][area][1]
    df_actions = df_years[year][area][0]
    total = get_total(year, area, df_years[year])
    
    switch = {
        'W': np.float(df_actions.loc['total', race] / total / np.float(df_pop.loc['White', 'Percent'])),#df_pop.loc['White', 'Population'] / df_actions.loc['total', race],
        'B': np.float(df_actions.loc['total', race] / total / np.float(df_pop.loc['Black', 'Percent'])),
        'A': np.float(df_actions.loc['total', race] / total) / np.float(df_pop.loc['Asian', 'Percent']),
        'H': np.float(df_actions.loc['total', race] / total) / np.float(df_pop.loc['Hispanic', 'Percent'])
    }
    
    return switch[race] * 100

In [433]:
def stops_vs_population_graph(area, year):
    ratios = list()
    for race in races:
        ratios.append(race_compare(area, race, year))

    plt.figure(figsize=(6, 4), dpi=75)
    plt.title('Ratio of stops% vs population%\nYear:'+year+'  Police area:'+area)
    plt.xlabel('Race')
    plt.ylabel('Ratio')
    plt.bar(races, ratios, width=.50)
    plt.show()

### Combining graphing functions

In [428]:
def simulteneous_graph_visual(area, year):
    stops_percentage_graph(area,year)
    population_percentage_graph(area,year)
    stops_vs_population_graph(area,year)

# <font color='Red'>[</font> <font color='Blue'>Data Visualization</font> <font color='Red'>]</font>

## <font color='Blue'>-></font> Interactable Graphs

In [450]:
interactive_plot = interactive(simulteneous_graph_visual, area=areas_codes_list, year=years_list)
output = interactive_plot.children[-1]
output.layout.height = '1000px'
interactive_plot

A Jupyter Widget