In [1]:
import os
import pandas as pd

In [2]:
# import os
# import pandas as pd
# from data_builder import DataBuilder
# from api_key import api_key

# def process_data():
#     citation_file = 'data/citation.csv'
#     employee_file = 'data/employee.csv'
#     census_file = 'data/census.csv'

#     citation_df, employee_df, census_df = None, None, None

#     if os.path.exists(citation_file) and os.path.exists(employee_file) and os.path.exists(census_file):
#         print("Files already exist. Loading from CSV.")
        
#         citation_df = pd.read_csv(citation_file)
#         employee_df = pd.read_csv(employee_file)
#         census_df = pd.read_csv(census_file)
#     else:
#         print("Files do not exist. Fetching data from API.")
        
#         builder = DataBuilder(api_key)
#         builder.run_all()

#         citation_df = pd.read_csv(citation_file)
#         employee_df = pd.read_csv(employee_file)
#         census_df = pd.read_csv(census_file)

#     return citation_df, employee_df, census_df

# if __name__ == '__main__':
#     citation_df, employee_df, census_df = process_data()




In [3]:
citation_file = 'data/citation.csv'
employee_file = 'data/employee.csv'
census_file = 'data/census.csv'

citation_df = pd.read_csv(citation_file)
employee_df = pd.read_csv(employee_file)
census_df = pd.read_csv(census_file)


In [4]:
citation_df = citation_df.copy()
citation_cols_to_drop = [
    'CITATION_CONTROL_NUMBER', 'ACTIVITY_RESULTS', 
    'ACTIVITY_DATE', 'ACTIVITY_TIME', 'ACTIVITY_LOCATION', 
    'ACTIVITY_DIVISION', 'ACTIVITY_BEAT', 'NUMBER_OF_PASSENGERS', 
    'WAS_VEHCILE_SEARCHED', 'REASON_FOR_SEARCH', 'ObjectId'
]

citation_df = citation_df.drop(citation_cols_to_drop, axis=1, errors='ignore')
citation_df.head()

Unnamed: 0,TYPE_OF_STOP,OFFICER_GENDER,OFFICER_RACE,OFFICER_AGE_RANGE,DRIVER_GENDER,DRIVER_RACE,DRIVER_AGE_RANGE
0,COMPLAINT/CRIMINAL VIOLATION,M,WHITE,31 - 40,M,WHITE,31 - 40
1,COMPLAINT/CRIMINAL VIOLATION,M,WHITE,41 - 50,M,BLACK,20 - 25
2,COMPLAINT/CRIMINAL VIOLATION,F,BLACK,31 - 40,M,WHITE,20 - 25
3,COMPLAINT/CRIMINAL VIOLATION,M,WHITE,31 - 40,F,WHITE,OVER 60
4,COMPLAINT/CRIMINAL VIOLATION,M,WHITE,41 - 50,M,BLACK,20 - 25


In [5]:
employee_df = employee_df.copy()
employee_cols_to_drop = [
 'AOC_CODE', 'RANK_TITLE', 'OFFICER_DIVISION',
 'OFFICER_ASSIGNMENT', 'OFFICER_YEARS_SWORN',
 ]

employee_df = employee_df.drop(employee_cols_to_drop, axis=1, errors='ignore')

race_mapping = {
    'W': 'WHITE',
    'B': 'BLACK',
    'H': 'HISPANIC',
    'A': 'ASIAN',
    'U': 'UNKNOWN'
}

employee_df['OFFICER_RACE'] = employee_df['OFFICER_RACE'].str.strip().map(race_mapping)
employee_df.head()

Unnamed: 0,OFFICER_SEX,OFFICER_RACE,OFFICER_AGE_RANGE,OFFICER_AGE,ObjectId
0,F,WHITE,40 - 49,48,1
1,M,WHITE,40 - 49,46,2
2,F,WHITE,40 - 49,45,3
3,F,WHITE,40 - 49,47,4
4,M,WHITE,50 - 59,52,5


In [6]:
census_df = census_df.copy()
census_df

Unnamed: 0,Name,Total_population,Total_male_population,Total_female_population,Total_population_for_race,White_alone,Black,Native_American,Asian,Hawaiian_Pacific_Islander,Other_race_alone,Two_or_more,State code,County code
0,"Jefferson County, Kentucky",768419,371306,397113,768419,539721,168526,1070,22880,499,9085,26638,21,111


In [7]:
race_percentage_cols = [
    'Total_population', 'Total_male_population', 'Total_female_population',
    'Total_population_for_race', 'White_alone', 'Black', 'Native_American',
    'Asian', 'Hawaiian_Pacific_Islander', 'Other_race_alone', 'Two_or_more'
]

louisville_census = pd.DataFrame()


for col in race_percentage_cols:
    louisville_census[col + " (%)"] = census_df[col].apply(lambda x: (x / census_df["Total_population"]) * 100)

louisville_census

Unnamed: 0,Total_population (%),Total_male_population (%),Total_female_population (%),Total_population_for_race (%),White_alone (%),Black (%),Native_American (%),Asian (%),Hawaiian_Pacific_Islander (%),Other_race_alone (%),Two_or_more (%)
0,100.0,48.320773,51.679227,100.0,70.237852,21.931524,0.139247,2.977542,0.064939,1.182298,3.466598


In [8]:
# louisville_census['Total_male_population (%)', 'Total_female_population (%)'] 
# employee_df['OFFICER_SEX']
# citation_df['DRIVER_GENDER']

In [12]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Sample DataFrames (replace these with your actual DataFrames)
male_percentage = louisville_census['Total_male_population (%)'][0]
female_percentage = louisville_census['Total_female_population (%)'][0]

# For employee_df
employee_gender_counts = employee_df['OFFICER_SEX'].value_counts()
employee_male = employee_gender_counts.get('M', 0)
employee_female = employee_gender_counts.get('F', 0)

# For citation_df
citation_gender_counts = citation_df['DRIVER_GENDER'].value_counts()
citation_male = citation_gender_counts.get('M', 0)
citation_female = citation_gender_counts.get('F', 0)

# Prepare the subplot
fig = make_subplots(rows=1, cols=3, 
                    subplot_titles=('Louisville Population', 'Police Force', 'Drivers Race From Citations'),
                    specs=[[{'type':'pie'}, {'type':'pie'}, {'type':'pie'}]])

# Data preparation
datasets = ['Louisville Population', 'Police Force', 'Drivers Race From Citations']
data_values = [
    [male_percentage, female_percentage],
    [employee_male, employee_female],
    [citation_male, citation_female]
]

# Custom colors for male and female
colors = ['#1f77b4', '#ff7f0e']  # Blue for Male, Orange for Female

# Add each pie chart to a subplot using a loop
for i, values in enumerate(data_values):
    fig.add_trace(go.Pie(labels=['Male', 'Female'], values=values, 
                         marker=dict(colors=colors)),  # Set custom colors here
                  row=1, col=i + 1)  # `i + 1` since rows and columns are 1-indexed


# Update layout
fig.update_layout(title_text='Gender Comparison', template='plotly_white')

# Show the figure
fig.show()
