In [1]:
# Dependencies
import pandas as pd
from pathlib import Path

In [2]:
# Save path to the data set as a variable.
victim_file = Path("../Datasets/VICTIM.csv")

In [3]:
# Use Pandas to the read data.
victim_df = pd.read_csv(victim_file)
victim_df.head()

Unnamed: 0,incidentid,race,injury,gender,schoolaffiliation,age
0,19700105DCHIW,,Fatal,Male,Student,15
1,19700105DCSOW,,Wounded,Male,Student,Teen
2,19700105DCUNW,,,Male,Unknown,16
3,19700206OHJOC,,Wounded,Male,Student,18
4,19700323CADAL,,Wounded,Male,Security Guard,44


In [4]:
def pull_strings(string):
    if string == "Teen":
        return "Teen"
    elif string == "Adult":
        return "Adult"
    elif string == "Child":
        return "Child"
    elif string == "Officer":
        return "Officer"
    else:
        return None

victim_df['age_cat'] = victim_df['age'].apply(pull_strings)
victim_df.head(10)

Unnamed: 0,incidentid,race,injury,gender,schoolaffiliation,age,age_cat
0,19700105DCHIW,,Fatal,Male,Student,15,
1,19700105DCSOW,,Wounded,Male,Student,Teen,Teen
2,19700105DCUNW,,,Male,Unknown,16,
3,19700206OHJOC,,Wounded,Male,Student,18,
4,19700323CADAL,,Wounded,Male,Security Guard,44,
5,19700323CADAL,,Wounded,Male,Student,16,
6,19700415ARPIP,Black,Fatal,Male,Student,20,
7,19700415ARPIP,Black,Wounded,Unknown,Student,Teen,Teen
8,19700415ARPIP,Black,Wounded,Unknown,Student,Teen,Teen
9,19700415ARPIP,Black,Wounded,Unknown,Student,Teen,Teen


In [5]:
def replace_strings(string):
    if string == "Teen":
        return 0
    elif string == "Adult":
        return 0
    elif string == "Child":
        return 0
    elif string == "Officer":
        return 0
    elif string == "null":
        return 0
    elif string == "":
        return 0
    else:
        return string

victim_df['age'] = victim_df['age'].apply(replace_strings)
victim_df.head(10)

Unnamed: 0,incidentid,race,injury,gender,schoolaffiliation,age,age_cat
0,19700105DCHIW,,Fatal,Male,Student,15,
1,19700105DCSOW,,Wounded,Male,Student,0,Teen
2,19700105DCUNW,,,Male,Unknown,16,
3,19700206OHJOC,,Wounded,Male,Student,18,
4,19700323CADAL,,Wounded,Male,Security Guard,44,
5,19700323CADAL,,Wounded,Male,Student,16,
6,19700415ARPIP,Black,Fatal,Male,Student,20,
7,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen
8,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen
9,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen


In [6]:
victim_df['age'] = victim_df['age'].fillna(0)
victim_df["age"] = victim_df["age"].astype(int)
victim_df.dtypes

incidentid           object
race                 object
injury               object
gender               object
schoolaffiliation    object
age                   int32
age_cat              object
dtype: object

In [7]:
def categorize(age):
    if age < 12 and age > 0:
        return "Child"
    elif age < 21 and age > 12:
        return "Teen"
    elif age > 21:
        return "Adult"
    else:
        return 0

victim_df['age_cat2'] = victim_df['age'].apply(categorize)
victim_df.head(10)

Unnamed: 0,incidentid,race,injury,gender,schoolaffiliation,age,age_cat,age_cat2
0,19700105DCHIW,,Fatal,Male,Student,15,,Teen
1,19700105DCSOW,,Wounded,Male,Student,0,Teen,0
2,19700105DCUNW,,,Male,Unknown,16,,Teen
3,19700206OHJOC,,Wounded,Male,Student,18,,Teen
4,19700323CADAL,,Wounded,Male,Security Guard,44,,Adult
5,19700323CADAL,,Wounded,Male,Student,16,,Teen
6,19700415ARPIP,Black,Fatal,Male,Student,20,,Teen
7,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0
8,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0
9,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0


In [8]:
victim_df['age_cat'] = victim_df['age_cat'].fillna(victim_df['age_cat2'])
victim_df.head(10)

Unnamed: 0,incidentid,race,injury,gender,schoolaffiliation,age,age_cat,age_cat2
0,19700105DCHIW,,Fatal,Male,Student,15,Teen,Teen
1,19700105DCSOW,,Wounded,Male,Student,0,Teen,0
2,19700105DCUNW,,,Male,Unknown,16,Teen,Teen
3,19700206OHJOC,,Wounded,Male,Student,18,Teen,Teen
4,19700323CADAL,,Wounded,Male,Security Guard,44,Adult,Adult
5,19700323CADAL,,Wounded,Male,Student,16,Teen,Teen
6,19700415ARPIP,Black,Fatal,Male,Student,20,Teen,Teen
7,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0
8,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0
9,19700415ARPIP,Black,Wounded,Unknown,Student,0,Teen,0


In [9]:
clean_victim_df = victim_df[["incidentid", "injury", "age_cat"]]
clean_victim_df.head(10)

Unnamed: 0,incidentid,injury,age_cat
0,19700105DCHIW,Fatal,Teen
1,19700105DCSOW,Wounded,Teen
2,19700105DCUNW,,Teen
3,19700206OHJOC,Wounded,Teen
4,19700323CADAL,Wounded,Adult
5,19700323CADAL,Wounded,Teen
6,19700415ARPIP,Fatal,Teen
7,19700415ARPIP,Wounded,Teen
8,19700415ARPIP,Wounded,Teen
9,19700415ARPIP,Wounded,Teen


In [10]:
clean_victim_df.rename(columns={'incidentid': "Incident_ID",
                                'injury': "Injury",
                                'age_cat': "Age"}, inplace=True)
clean_victim_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_victim_df.rename(columns={'incidentid': "Incident_ID",


Unnamed: 0,Incident_ID,Injury,Age
0,19700105DCHIW,Fatal,Teen
1,19700105DCSOW,Wounded,Teen
2,19700105DCUNW,,Teen
3,19700206OHJOC,Wounded,Teen
4,19700323CADAL,Wounded,Adult


In [11]:
clean_victim_df.to_csv("clean_victim.csv", index=False, header=True)

In [12]:
#Group by victim age
clean_victim_df.groupby('Age')['Incident_ID'].count().reset_index().sort_values('Incident_ID', ascending=False)

Unnamed: 0,Age,Incident_ID
4,Teen,1781
1,Adult,633
0,0,347
2,Child,331
3,Officer,1


In [13]:
#Group by victim injury
clean_victim_df.groupby('Injury')['Incident_ID'].count().reset_index().sort_values('Incident_ID', ascending=False)

Unnamed: 0,Injury,Incident_ID
3,Wounded,1835
0,Fatal,683
2,,481
1,Minor Injuries,94
