# Victims

In [1]:
import pandas as pd
import numpy as np

In [66]:
dtype_victims={
    'case_id': np.uint64, # cannot use int since biggest case id is 9.870011e+18, np.uint64: 64-bit unsigned integer (from 0 to 2**64-1)
    'id': int,
    'party_number': int,
    'victim_age': pd.Int16Dtype(), # 999?
    'victim_degree_of_injury': str,# map to ints
    'victim_ejected': pd.Int16Dtype(),
    'victim_role': int,# map to ints
    'victim_safety_equipment_1': str,# map char to ints
    'victim_safety_equipment_2': str,# map char to ints
    'victim_seating_position': pd.Int16Dtype(),# no letters in data so we can use ints
    'victim_sex': str, #char
}

In [67]:
victims_data = pd.read_csv("CSV-2018/victims2018.csv",dtype=dtype_victims,usecols=dtype_victims.keys()) 

In [68]:
victims_data

Unnamed: 0,case_id,id,party_number,victim_age,victim_degree_of_injury,victim_ejected,victim_role,victim_safety_equipment_1,victim_safety_equipment_2,victim_seating_position,victim_sex
0,3,3,1,21,no injury,0,2,G,,3,male
1,5,4,1,44,severe injury,0,1,G,,1,male
2,8,5,1,59,no injury,0,2,G,,3,female
3,8,6,2,31,no injury,0,2,G,,3,male
4,9,7,2,14,complaint of pain,0,2,C,,6,female
...,...,...,...,...,...,...,...,...,...,...,...
4082680,9870011226102009803,8817531,2,20,other visible injury,0,1,G,,1,female
4082681,9870011231152508671,8817532,1,27,complaint of pain,0,2,H,,3,female
4082682,9870011231152508671,8817533,2,74,complaint of pain,0,2,G,,3,female
4082683,9870011231152508671,8817534,2,64,complaint of pain,0,1,G,,1,male


In [69]:
clean_data = victims_data.copy()

In [70]:
translation_victim_age = {
    "not stated": 998,
    "pregnancy": 999,
}

translation_victim_degree_of_injury = {
    "killed": 1,
    "severe injury": 2,
    "other visible injury": 3,
    "complaint of pain": 4,
    "suspected serious injury": 5,
    "suspected minor injury": 6,
    "possible injury": 7,
    "no injury": 0,
    "7": 7,
}

translation_victim_ejected = {
    "not ejected": 0,
    "fully ejected": 1,
    "partially ejected": 2,
    "unknown": 3,
}

translation_victim_role = {
    "driver": 1,
    "passenger (includes non-operator on bicycle or any victim on/in parked vehicle or multiple victims on/in non-motor vehicle)": 2,
    "pedestrian": 3,
    "bicyclist": 4,
    "other (single victim on/in non-motor vehicle; e.g. ridden animal, horse-drawn carriage, train, or building)": 5,
    "non-injured party": 6,
}

translation_victim_sex = {
    "male": 'M',
    "female": 'F',
}

In [111]:
clean_data["victim_age"] = victims_data["victim_age"].map(lambda s: translation_victim_age.get(s, s)).astype(pd.Int16Dtype())
clean_data["victim_degree_of_injury"] = victims_data["victim_degree_of_injury"].map(lambda s: translation_victim_degree_of_injury.get(s, None)).astype(pd.Int16Dtype())
clean_data["victim_sex"] = victims_data["victim_sex"].map(lambda s: translation_victim_sex.get(s, None))
clean_data["victim_ejected"] = victims_data["victim_ejected"].map(lambda s: None if pd.isna(s) or s>3 or s <0 else s).astype(pd.Int16Dtype())


In victim_ejected column, there were 4 values that were 4. Since 4 is not one of the preset options for victim_ejected we decided to replace the 4's by null

---------------------------------------

# Create CSV files

In [13]:
csv = clean_data[['id','victim_degree_of_injury']]
a = clean_data['victim_degree_of_injury']
dict_victim_degree_of_injury = pd.read_csv('tables/Victims/victim_degree_of_injury.csv').to_dict()['description']
descriptions_victim_degree_of_injury = a.map(lambda s: dict_victim_degree_of_injury.get(s,None))
# csv = csv.insert(0, 'descriptions_victim_degree_of_injury',descriptions_victim_degree_of_injury)
# print(len(descriptions_victim_degree_of_injury))
# csv.shape
csv['descriptions_victim_degree_of_injury']=descriptions_victim_degree_of_injury
csv.to_csv('out/victim_degree_of_injury.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csv['descriptions_victim_degree_of_injury']=descriptions_victim_degree_of_injury
