# Traitement des données

Dans cette partie, l'objectif est d'importer nos données, les convertir à un format convenable pour la production de statistiques descriptives, et exporter le résultat pour que ces bases puissent être utilisées par les autres programmes

Les bases en question sont :
- la base listant tous les incidents de violence par armes à feu aux USA entre 2013 et 2018
- la base listant les caractéristiques générales des comtés et de ses habitants.

In [2]:
#Pour le traitement classique
import numpy as np
import pandas as pd

#Pour l'interaction avec l'API 
import requests
from statistics import mean
import time

## Traitement de la base d'incidents armes à feu

In [5]:
#Base incidents armes à feu
url="https://drive.google.com/file/d/1GGOLMc_Ow9yZC9sICegPegDggQuHOD3t/view?usp=drive_link"
url="https://drive.google.com/uc?export=download&confirm=1&id=" + url.split("/")[-2]
gun_violence_db = pd.read_csv(url)
gun_violence_db.sample(5)

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
79345,373650,2015-07-12,Texas,Corpus Christi,,0,1,http://www.gunviolencearchive.org/incident/373650,http://ccpdblotter.com/,False,...,0::32||1::30,0::Adult 18+||1::Adult 18+,0::Female||1::Male,,1::Family,0::Injured||1::Unharmed,0::Victim||1::Subject-Suspect,http://ccpdblotter.com/,34.0,20.0
24325,154356,2014-07-03,California,San Francisco,Connecticut and 25th,0,2,http://www.gunviolencearchive.org/incident/154356,http://www.ktvu.com/news/news/crime-law/two-in...,False,...,,0::Teen 12-17||1::Adult 18+,0::Male,,,0::Injured||1::Injured,0::Victim||1::Victim,http://www.ktvu.com/news/news/crime-law/two-in...,17.0,11.0
186195,840009,2017-05-08,Louisiana,Eunice,E Dean St,1,0,http://www.gunviolencearchive.org/incident/840009,http://www.theadvertiser.com/story/news/2017/0...,False,...,0::12,0::Teen 12-17,0::Male,,,0::Killed,0::Victim,http://www.theadvertiser.com/story/news/2017/0...,41.0,28.0
184600,831589,2017-04-29,Virginia,Newport News,13200 block of Sojourner Court,1,0,http://www.gunviolencearchive.org/incident/831589,http://wavy.com/2017/05/04/newport-news-police...,False,...,0::27,0::Adult 18+,0::Male,0::Joshua Aaron Kaplan,,0::Killed,0::Victim,http://wavy.com/2017/04/29/deadly-shooting-on-...,95.0,1.0
204596,916982,2017-08-20,South Carolina,Hartsville,New Market Road,0,0,http://www.gunviolencearchive.org/incident/916982,http://www.live5news.com/story/36176046/hartsv...,False,...,0::26,0::Adult 18+,0::Male,0::Bradley Dale Wright,,"0::Unharmed, Arrested",0::Subject-Suspect,http://www.live5news.com/story/36176046/hartsv...,65.0,29.0


Conformément à la documentation de la base, certaines colonnes sont codées de façon à pouvoir les reconvertir en dictionnaire :

In [6]:
def convert_to_dict(value):
    if pd.isna(value):
        return value

    pairs = value.split('||')
    result_dict = {}
    for pair in pairs:
        #Some are corrupted : 1: instead of ::
        if '::' in pair:
            key, val = pair.split('::', 1)
            result_dict[int(key)] = val
        else:
            key, val = pair.split(':', 1)
            result_dict[int(key)] = val
    return result_dict

list_of_dict_columns = ['gun_stolen', 'gun_type', 'participant_age', 'participant_age_group', 'participant_gender', 'participant_name', 'participant_relationship', 'participant_status', 'participant_type']
gun_violence_db[list_of_dict_columns] = gun_violence_db[list_of_dict_columns].applymap(convert_to_dict)
gun_violence_db.head()



  gun_violence_db[list_of_dict_columns] = gun_violence_db[list_of_dict_columns].applymap(convert_to_dict)


Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,{0: '20'},"{0: 'Adult 18+', 1: 'Adult 18+', 2: 'Adult 18+...","{0: 'Male', 1: 'Male', 3: 'Male', 4: 'Female'}",{0: 'Julian Sims'},,"{0: 'Arrested', 1: 'Injured', 2: 'Injured', 3:...","{0: 'Victim', 1: 'Victim', 2: 'Victim', 3: 'Vi...",http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,{0: '20'},"{0: 'Adult 18+', 1: 'Adult 18+', 2: 'Adult 18+...",{0: 'Male'},{0: 'Bernard Gillis'},,"{0: 'Killed', 1: 'Injured', 2: 'Injured', 3: '...","{0: 'Victim', 1: 'Victim', 2: 'Victim', 3: 'Vi...",http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0
2,478855,2013-01-01,Ohio,Lorain,1776 East 28th Street,1,3,http://www.gunviolencearchive.org/incident/478855,http://chronicle.northcoastnow.com/2013/02/14/...,False,...,"{0: '25', 1: '31', 2: '33', 3: '34', 4: '33'}","{0: 'Adult 18+', 1: 'Adult 18+', 2: 'Adult 18+...","{0: 'Male', 1: 'Male', 2: 'Male', 3: 'Male', 4...","{0: 'Damien Bell', 1: 'Desmen Noble', 2: 'Herm...",,"{0: 'Injured, Unharmed, Arrested', 1: 'Unharme...","{0: 'Subject-Suspect', 1: 'Subject-Suspect', 2...",http://www.morningjournal.com/general-news/201...,56.0,13.0
3,478925,2013-01-05,Colorado,Aurora,16000 block of East Ithaca Place,4,0,http://www.gunviolencearchive.org/incident/478925,http://www.dailydemocrat.com/20130106/aurora-s...,False,...,"{0: '29', 1: '33', 2: '56', 3: '33'}","{0: 'Adult 18+', 1: 'Adult 18+', 2: 'Adult 18+...","{0: 'Female', 1: 'Male', 2: 'Male', 3: 'Male'}","{0: 'Stacie Philbrook', 1: 'Christopher Ratlif...",,"{0: 'Killed', 1: 'Killed', 2: 'Killed', 3: 'Ki...","{0: 'Victim', 1: 'Victim', 2: 'Victim', 3: 'Su...",http://denver.cbslocal.com/2013/01/06/officer-...,40.0,28.0
4,478959,2013-01-07,North Carolina,Greensboro,307 Mourning Dove Terrace,2,2,http://www.gunviolencearchive.org/incident/478959,http://www.journalnow.com/news/local/article_d...,False,...,"{0: '18', 1: '46', 2: '14', 3: '47'}","{0: 'Adult 18+', 1: 'Adult 18+', 2: 'Teen 12-1...","{0: 'Female', 1: 'Male', 2: 'Male', 3: 'Female'}","{0: 'Danielle Imani Jameison', 1: 'Maurice Eug...",{3: 'Family'},"{0: 'Injured', 1: 'Injured', 2: 'Killed', 3: '...","{0: 'Victim', 1: 'Victim', 2: 'Victim', 3: 'Su...",http://myfox8.com/2013/01/08/update-mother-sho...,62.0,27.0


In [8]:
gun_violence_db.to_csv("data/gun_violence_db.csv", index=False)

## Traitements des bases comtés de l'API


La documentation précisant la manière d'interagir avec l'API de StLouisFed se trouve à la page https://fred.stlouisfed.org/docs/api/fred/#API.

In [87]:
api_key = "180de2e6a1d1e953d270ebf38341cd44"
param = {"api_key" : api_key, "file_type" : "json", "category_id" : "27281"}
url = "https://api.stlouisfed.org/fred/category/children?"
response = requests.get(url, params=param)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()
    # Process the data as needed
else:
    print(f"Error: {response.status_code}, {response.text}")
data

{'categories': [{'id': 27282, 'name': 'Alabama', 'parent_id': 27281},
  {'id': 27283, 'name': 'Alaska', 'parent_id': 27281},
  {'id': 27284, 'name': 'Arizona', 'parent_id': 27281},
  {'id': 149, 'name': 'Arkansas', 'parent_id': 27281},
  {'id': 27286, 'name': 'California', 'parent_id': 27281},
  {'id': 27287, 'name': 'Colorado', 'parent_id': 27281},
  {'id': 27288, 'name': 'Connecticut', 'parent_id': 27281},
  {'id': 27289, 'name': 'Delaware', 'parent_id': 27281},
  {'id': 27290, 'name': 'District of Columbia', 'parent_id': 27281},
  {'id': 27291, 'name': 'Florida', 'parent_id': 27281},
  {'id': 27292, 'name': 'Georgia', 'parent_id': 27281},
  {'id': 27293, 'name': 'Hawaii', 'parent_id': 27281},
  {'id': 27294, 'name': 'Idaho', 'parent_id': 27281},
  {'id': 150, 'name': 'Illinois', 'parent_id': 27281},
  {'id': 151, 'name': 'Indiana', 'parent_id': 27281},
  {'id': 27297, 'name': 'Iowa', 'parent_id': 27281},
  {'id': 27298, 'name': 'Kansas', 'parent_id': 27281},
  {'id': 152, 'name': 'K

In [98]:
def request_db(index):
    #this function requests to the API the database associated with the category id index
    param["category_id"] = index #on ajuste les paramètres de la request pour demander la bonne catégorie
    response = requests.get(url, params = param)
    data = response.json()
    return data["categories"]

In [76]:
param["category_id"] = 27282 #on ajuste les paramètres de la request pour demander la bonne catégorie
response = requests.get(url, params = param)
data_2 = response.json()
data_2["categories"]


[{'id': 27335, 'name': 'Counties', 'parent_id': 27282},
 {'id': 30529, 'name': 'MSAs', 'parent_id': 27282}]

In [109]:
#We create our dframe by creating a list of dicts, each element is a new row
for state in data['categories']:
    database = list()
    id_state = state['id']
    state_name = state['name']
    
    #Request to recover id in order to extract counties
    state_info = request_db(id_state)
    if state_info != []: #One exception : which one ?
        id_list_of_state_counties = state_info[0]['id']
        list_of_state_counties = request_db(id_list_of_state_counties)
        for county in list_of_state_counties:
            dict_county = dict()
            id_county = county['id']
            if ',' in county['name']:
                county_name,  state_code = county['name'].split(',')
            else:
                county_name = county['name']
                state_code = np.nan

            #Update
            dict_county['Nom'] = county_name
            dict_county['Etat'] = state_name
            dict_county['Code Etat'] = state_code
            dict_county['id Etat'] = id_state
            dict_county['id county'] = id_county

            print(dict_county)
        database.append(dict_county)

counties_db = 

    

{'Nom': 'Autauga County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27336}
{'Nom': 'Baldwin County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27337}
{'Nom': 'Barbour County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27338}
{'Nom': 'Bibb County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27339}
{'Nom': 'Blount County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27340}
{'Nom': 'Bullock County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27341}
{'Nom': 'Butler County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27342}
{'Nom': 'Calhoun County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27343}
{'Nom': 'Chambers County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 27282, 'id county': 27344}
{'Nom': 'Cherokee County', 'Etat': 'Alabama', 'Code Etat': ' AL', 'id Etat': 

ValueError: too many values to unpack (expected 2)

In [111]:
print(county['name'])

Augusta, Staunton + Waynesboro County, VA
