# Victims

In [None]:
import pandas as pd
import numpy as np

In [None]:
dtype_victims={
    'case_id': np.uint64, # cannot use int since biggest case id is 9.870011e+18, np.uint64: 64-bit unsigned integer (from 0 to 2**64-1)
    'id': int,
    'party_number': int,
    'victim_age': pd.Int16Dtype(), # 999?
    'victim_degree_of_injury': str,# map to ints
    'victim_ejected': pd.Int16Dtype(),
    'victim_role': int,# map to ints
    'victim_safety_equipment_1': str,# map char to ints
    'victim_safety_equipment_2': str,# map char to ints
    'victim_seating_position': pd.Int16Dtype(),# no letters in data so we can use ints
    'victim_sex': str, #char
}

In [None]:
victims_data = pd.read_csv("CSV-2018/victims2018.csv",dtype=dtype_victims,usecols=dtype_victims.keys()) 

In [None]:
victims_data

In [None]:
clean_data = victims_data.copy()

In [None]:
translation_victim_age = {
    "not stated": 998,
    "pregnancy": 999,
}

translation_victim_degree_of_injury = {
    "killed": 1,
    "severe injury": 2,
    "other visible injury": 3,
    "complaint of pain": 4,
    "suspected serious injury": 5,
    "suspected minor injury": 6,
    "possible injury": 7,
    "no injury": 0,
    "7": 7,
}

translation_victim_ejected = {
    "not ejected": 0,
    "fully ejected": 1,
    "partially ejected": 2,
    "unknown": 3,
}

translation_victim_role = {
    "driver": 1,
    "passenger (includes non-operator on bicycle or any victim on/in parked vehicle or multiple victims on/in non-motor vehicle)": 2,
    "pedestrian": 3,
    "bicyclist": 4,
    "other (single victim on/in non-motor vehicle; e.g. ridden animal, horse-drawn carriage, train, or building)": 5,
    "non-injured party": 6,
}

translation_victim_sex = {
    "male": 'M',
    "female": 'F',
}

In [None]:
clean_data["victim_degree_of_injury"] = victims_data["victim_degree_of_injury"].map(lambda s: translation_victim_degree_of_injury.get(s, None)).astype(pd.Int16Dtype())
clean_data["victim_sex"] = victims_data["victim_sex"].map(lambda s: translation_victim_sex.get(s, None))
clean_data["victim_ejected"] = victims_data["victim_ejected"].map(lambda s: None if pd.isna(s) or s>3 or s <0 else s).astype(pd.Int16Dtype())


In victim_ejected column, there were 4 values that were 4. Since 4 is not one of the preset options for victim_ejected we decided to replace the 4's by null

---------------------------------------

# Create CSV files

In [None]:
csv = clean_data[['id','victim_degree_of_injury']]
a = clean_data['victim_degree_of_injury']
dict_victim_degree_of_injury = pd.read_csv('tables/Victims/victim_degree_of_injury.csv').to_dict()['description']
descriptions_victim_degree_of_injury = a.map(lambda s: dict_victim_degree_of_injury.get(s,None))
# csv = csv.insert(0, 'descriptions_victim_degree_of_injury',descriptions_victim_degree_of_injury)
# print(len(descriptions_victim_degree_of_injury))
# csv.shape
csv['descriptions_victim_degree_of_injury']=descriptions_victim_degree_of_injury
csv.to_csv('out/victim_degree_of_injury.csv', index=False)

In [None]:
csv

In [None]:
pd.read_data()