In [1]:
import pandas as pd

"""
rtcc.csv includes all the item numbers for which NOPD has requested RTCC footage.
rtcc item numbers can be merged on item numbers from NOPD's electronic police reports 
"""

def clean():
    df1 = pd.read_csv("electronic_police_report_2018.csv")
    df2 = pd.read_csv("electronic_police_report_2019.csv")
    df3 = pd.read_csv("electronic_police_report_2020.csv")
    df4 = pd.read_csv("electronic_police_report_2021.csv")
    df5 = pd.read_csv("electronic_police_report_2022.csv")

    dfs = [df1, df2, df3, df4, df5]

    dfa = pd.concat(dfs, join="outer")

    dfb = pd.read_csv("rtcc.csv", encoding="cp1252").rename(
        columns={"Item_number": "item_number"}
    )

    df = pd.merge(dfa, dfb, on="item_number")

    # review district
    # review signal description
    # review charge description
    # offender demographics
    return df

In [2]:

def filter_race(df):
    df.loc[:, "offender_race"] = df.offender_race.fillna("").str.lower().str.strip()
    # .str.replace("unknown", "non-black", regex=False)\
    # .str.replace("hispanic", "non-black", regex=False)\
    # .str.replace("asian", "non-black", regex=False)\
    # .str.replace("white", "non-black", regex=False)\
    # .str.replace(r"amer\. ind\.", "non-black", regex=True)
    return df[~((df.offender_race == ""))]


def filter_gender(df):
    df.loc[:, "offender_gender"] = (
        df.offender_gender.str.lower()
        .str.strip()
        .fillna("")
        .str.replace("unknown", "", regex=False)
    )
      # df = df[~((df.offender_gender == ""))]
      # df = df[(df.offender_gender.isin(["female"]))]
      # df.loc[:, "offenderstatus"] = df.offenderstatus.str.lower().str.strip().fillna("")
      # df = df[(df.offenderstatus.isin(["arrested"]))]
    
    return df


def filter_year(df):
    df = df[(df.year.astype(str).isin(["2018", "2019", "2020", "2021", "2022"]))]
    # the following figures are percentages of the "offender_race" value
    ### 2018 - 2022
    """
      black         0.768292
      unknown       0.152651
      white         0.068554
      hispanic      0.009226
      asian         0.001065
      -amer. ind.    0.000213
    """
    ### 2022
    """
      black      0.660252
      unknown    0.297557
      white      0.039230
      asian      0.002961
    """
    ### 2021
    """
      black       0.663802
      unknown     0.279270
      white       0.055209
      asian       0.001074
      hispanic    0.000644
    """
    ### 2020
    """
      black         0.867308
      unknown       0.069408
      white         0.046369
      hispanic      0.016040
      amer. ind.    0.000875
    """
    ### 2019
    """
      black       0.849009
      white       0.091532
      unknown     0.044685
      hispanic    0.013694
      asian       0.001081
    """
    ### 2018
    """
      black       0.804891
      white       0.129187
      unknown     0.046252
      hispanic    0.018075
      asian       0.001595
    """
    return df


def extract_years(df):
    years = df.occurred_date_time.astype(str).str.extract(r"(\w{4})")

    df.loc[:, "year"] = years[0]
    """
      2021    5600
      2020    5370
      2019    4102
      2018    2559
      2022    1492
      """
    return df

In [3]:
def merged():
    df = pd.read_csv("rtcc_merged.csv").pipe(filter_year).pipe(filter_race)
    return df

In [4]:
df = merged()

In [5]:
df

Unnamed: 0,item_number,district,location,disposition,signal_type,signal_description,occurred_date_time,charge_code,charge_description,offender_race,...,victim_age,victim_number,victim_fatal_status,hate_crime,report_type,year,offenderid,offenderstatus,persontype,Signal Code
0,D-20771-18,2,36XX S Roman St,CLOSED,30S,HOMICIDE (SHOOTING),2018-04-18 04:52:00,14 (27) 30.1,ATTEMPT - SECOND DEGREEMURDER,black,...,,2.0,Non-fatal,,Supplemental Report,2018,,,,30s
1,D-20771-18,2,36XX S Roman St,CLOSED,30S,HOMICIDE (SHOOTING),2018-04-18 04:52:00,14 30.1,SECOND DEGREE MURDER,black,...,,1.0,Fatal,,Supplemental Report,2018,,,,30s
8,G-35522-18,3,Metairie Rd & City Park Av,CLOSED,34S,AGGRAVATED BATTERY (SHOOTING),2018-07-28 22:00:00,14 34.7,AGG. SECOND DEGREE BATTERY,black,...,44.0,1.0,Non-fatal,,Supplemental Report,2018,,,,34S
9,G-35522-18,3,Metairie Rd & City Park Av,CLOSED,34S,AGGRAVATED BATTERY (SHOOTING),2018-07-28 22:00:00,,,black,...,43.0,2.0,Non-fatal,,Supplemental Report,2018,,,,34S
11,E-14666-18,1,3XX N Tonti St,OPEN,64G,ARMED ROBBERY (GUN),2018-05-12 03:30:00,14 (24) 64,PRINCIPAL TO ARMED ROBBERY,black,...,26.0,1.0,Non-fatal,,Incident Report,2018,,,,64G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19116,A-18985-22,7,Dorothea St & Deanne St,OPEN,64G,ARMED ROBBERY (GUN),1/17/2022 20:53,14 64,ARMED ROBBERY,black,...,60.0,1.0,Non-fatal,,Incident Report,2022,729047.0,,VICTIM,64 G
19117,A-19147-22,6,021XX Saint Charles Av,CLOSED,64G,ARMED ROBBERY (GUN),1/18/2022 1:30,14 64,ARMED ROBBERY,black,...,,1.0,Non-fatal,,Incident Report,2022,727656.0,ARRESTED,VICTIM,64 G
19118,A-19147-22,6,021XX Saint Charles Av,CLOSED,64G,ARMED ROBBERY (GUN),1/18/2022 1:30,,,black,...,45.0,2.0,Non-fatal,,Incident Report,2022,727656.0,ARRESTED,VICTIM,64 G
19119,A-19147-22,6,021XX Saint Charles Av,CLOSED,64G,ARMED ROBBERY (GUN),1/18/2022 1:30,,,black,...,30.0,3.0,Non-fatal,,Incident Report,2022,727656.0,ARRESTED,WITNESS,64 G
