In [138]:
import pandas as pd

def clean_item_no(df):
    df.loc[:, "item_number"] = df.item_number.str.lower().str.strip().str.replace(r"(\w)(\w+)(\w{2})$", r"\1-\2-\3", regex=True)
    return df

def standardize_item_no(df):
    df.loc[:, "item_number"] = df.item_number.str.lower().str.strip()
    return df

def drop_rows_missing_race(df):
    df.loc[:, "citizen_race"] = df.citizen_race.fillna("").str.lower().str.strip()\
    .str.replace("unknown", "", regex=False)\
    # .str.replace("hispanic", "", regex=False)\
    # .str.replace("asian", "", regex=False)\
    # .str.replace(r"amer\. ind\.", "", regex=True)
    return df[~((df.citizen_race == ""))]
    
def filter_gender(df):
    df.loc[:, "citizen_gender"] = (
        df.citizen_gender.str.lower().str.strip().fillna("").str.replace("unknown", "", regex=False)
    )
    return df[~((df.citizen_gender == ""))]

In [139]:
def sas():
    dfa = pd.read_csv("../data/sas_new_orleans_pd_2010_2021.csv")\
        .pipe(clean_item_no)

    dfb = pd.read_csv("../data/rtcc.csv", encoding="cp1252").rename(columns={"Item_number": "item_number"})
    dfb = dfb.pipe(standardize_item_no)
    dfb["rtcc_footage_requested"] = "yes"

    df = pd.merge(dfa, dfb, on="item_number")
    return df

sas_df = sas()

sas_df[~((sas_df.stop_and_search_year.fillna("") == ""))]
sas_df = sas_df.pipe(drop_rows_missing_race).pipe(filter_gender).drop_duplicates(subset=["stop_and_search_uid", "citizen_race", "citizen_gender"])


  dfa = pd.read_csv("../data/sas_new_orleans_pd_2010_2021.csv")\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, "citizen_gender"] = (


In [140]:
sas_df.columns

Index(['stop_and_search_interview_id', 'item_number', 'district', 'zone',
       'stop_reason', 'vehicle_year', 'vehicle_make', 'vehicle_model',
       'vehicle_style', 'vehicle_color', 'citizen_id', 'citizen_race',
       'citizen_height', 'citizen_weight', 'citizen_hair_color', 'zip_code',
       'stop_and_search_location', 'id', 'stop_and_search_interview_id_2',
       'vehicle_number', 'citizen_gender', 'citizen_eye_color',
       'citizen_driver_license_state', 'assigned_district',
       'assigned_department', 'stop_results', 'subject_type',
       'search_occurred', 'evidence_seized', 'evidence_types',
       'strip_body_cavity_search', 'legal_basis', 'consent_to_search',
       'consent_form_completed', 'search_types', 'exit_vehicle', 'rank_desc',
       'first_name', 'middle_name', 'last_name', 'badge_number',
       'stop_and_search_year', 'stop_and_search_month', 'stop_and_search_day',
       'stop_and_search_time', 'agency', 'uid', 'stop_and_search_uid',
       'Signal Code

In [141]:
sas_df = sas_df[sas_df.rtcc_footage_requested.isin(["yes"])]

In [142]:
sas_df["rtcc_footage_request_tally"] = 1

In [143]:
sas_df.groupby("citizen_race").rtcc_footage_request_tally.value_counts()

citizen_race              rtcc_footage_request_tally
asian / pacific islander  1                                7
black                     1                             2797
hispanic                  1                               84
native american           1                                4
white                     1                              581
Name: rtcc_footage_request_tally, dtype: int64

In [144]:
sas_df.citizen_race.unique()


array(['black', 'white', 'hispanic', 'asian / pacific islander',
       'native american'], dtype=object)

In [145]:
sas_tally = sas_df[["citizen_race", "rtcc_footage_request_tally",]]
sas_tally = sas_tally.groupby(["citizen_race",]).rtcc_footage_request_tally.count()
sas_tally = pd.DataFrame(sas_tally).reset_index()
sas_tally.loc[(sas_tally.citizen_race == "black"), "population"] = "105498"
sas_tally.loc[(sas_tally.citizen_race == "white"), "population"] = "65756"
sas_tally.loc[(sas_tally.citizen_race == "hispanic"), "population"] = "11205"
sas_tally.loc[(sas_tally.citizen_race == "native american"), "population"] = "360"
sas_tally.loc[(sas_tally.citizen_race == "asian / pacific islander"), "population"] = "5499"

In [146]:
def calc_rate(dfb_total, population=""):
    population = int(population)
    rate = (( dfb_total / population) * 1000)
    return rate

In [147]:
sas_tally["rate_per_1000_persons"] = sas_tally.apply(lambda x: calc_rate(x.rtcc_footage_request_tally, x.population), axis=1)
sas_tally["year"] = "2018-2022"

In [148]:
# sas_tally.to_excel("../data/excel/rtcc_requests_sas_total.xlsx")

In [149]:
sas_stops = sas_df[["citizen_race", "search_occurred", "rtcc_footage_request_tally", "stop_results"]]
sas_stops = sas_stops[sas_stops.search_occurred.isin(["yes"])]
sas_stops = sas_stops[sas_stops.stop_results.isin(["physical arrest"])]
sas_stops = sas_stops.groupby(["citizen_race",]).rtcc_footage_request_tally.count()
sas_stops = pd.DataFrame(sas_stops).reset_index()
sas_stops.loc[(sas_stops.citizen_race == "black"), "population"] = "105498"
sas_stops.loc[(sas_stops.citizen_race == "white"), "population"] = "65756"
sas_stops.loc[(sas_stops.citizen_race == "hispanic"), "population"] = "11205"
sas_stops.loc[(sas_stops.citizen_race == "native american"), "population"] = "360"
sas_stops.loc[(sas_stops.citizen_race == "asian / pacific islander"), "population"] = "5499"
sas_stops["rate_per_1000_persons"] = sas_stops.apply(lambda x: calc_rate(x.rtcc_footage_request_tally, x.population), axis=1)
sas_stops["year"] = "2018-2022"

In [151]:
sas_stops.to_excel("../data/excel/rtcc_sas_stops_arrested.xlsx")