# PROJET - ESCOBADDICTIONS
## 8PRO408 – Outils de programmation pour la science des données
### Auteurs : Andonin COUSSEAU / Etienne CHEVROLLIER / Khassan TASSOUEV / Damien BALLET

--------

## Etude 3 - Lien entre le taux de mort par cocaine et le taux de criminalité

[Lien WorldBank](https://data.worldbank.org/indicator/VC.IHR.PSRC.MA.P5)

------------

In [21]:
import pandas as pd
import numpy as np

df = pd.read_csv('data/murder/API_VC.IHR.PSRC.MA.P5_DS2_en_csv_v2_5564733.csv', skiprows = 3, header = 0)

In [22]:
df = df.loc[:, ['Country Name', 'Country Code'] + list(df.loc[:, '1990':])]
df = df.rename(columns = {'Country Code' : 'Code', 'Country Name' : 'Entity'})
df.set_index("Code")
# Homicide volontaire pour 100.000 personnes.

Unnamed: 0_level_0,Entity,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABW,Aruba,0.000000,,,,,16.074801,,,,...,4.065784,,,,,,,,,
AFE,Africa Eastern and Southern,,,,,,,,,,...,,,,,,,,,,
AFG,Afghanistan,,,,,,,,,,...,,18.511644,12.190030,12.282875,12.156260,,,,,
AFW,Africa Western and Central,,,,,,,,,,...,,,,,,,,,,
AGO,Angola,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XKX,Kosovo,,,,,,,,,,...,4.846974,3.646005,3.434958,4.052007,4.103817,2.496205,3.481223,,,
YEM,"Yemen, Rep.",,,,,,,,,,...,,,,,,,,,,
ZAF,South Africa,,,,,,,,,,...,,,59.730474,63.443761,65.598344,66.045214,,,,
ZMB,Zambia,,,,,,,,,,...,,,,,,,,,,


In [23]:
# On garde que les pays ayant au moins un certains nombre de valeur différentes de nan.
limite_nan = round((2022-1990) / 1.5)
df_correct = df.dropna(thresh=limite_nan)

In [29]:
df_correct

Unnamed: 0,Entity,Code,1990,1991,1992,1993,1994,1995,1996,1997,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
5,Albania,ALB,,,6.943821,10.51989,6.043041,14.164978,14.934879,71.384722,...,8.432802,3.320003,4.291655,3.118126,3.538696,3.614411,3.62681,3.786936,,
10,Armenia,ARM,9.027737,8.926263,,12.909431,8.223708,6.127272,5.80478,5.047377,...,3.935969,4.112312,5.28992,4.555902,3.035032,4.942078,2.844698,3.740554,,
13,Australia,AUS,2.77598,2.288495,2.115175,2.443378,2.314011,2.41397,2.662371,2.64818,...,1.277262,1.276442,1.22422,1.180448,1.089576,1.24017,1.177559,1.095763,,
14,Austria,AUT,1.336609,1.509696,1.439215,1.18955,0.929898,1.041523,1.262166,0.746155,...,0.478724,0.637332,0.535822,0.76292,0.966005,0.84708,0.638853,0.569332,,
17,Belgium,BEL,,,,,,,,,...,2.34779,2.277758,1.921075,1.890449,2.073362,1.60271,,1.987848,,
21,Bulgaria,BGR,4.071797,6.637273,8.566987,9.525435,9.480332,10.126852,8.735996,7.554768,...,2.407643,2.738215,1.622816,1.952927,1.564632,1.753917,1.270206,1.796755,,
23,"Bahamas, The",BHS,,,,20.490935,32.173137,22.683607,29.123215,25.763506,...,59.260208,68.836244,54.667644,60.57726,45.677269,45.938685,31.875623,57.940547,,
29,Brazil,BRA,35.942776,33.655206,30.689501,32.543255,34.093166,38.137971,42.694879,44.119581,...,54.684732,52.93307,55.469658,57.279625,49.667915,38.674681,41.783067,,,
30,Barbados,BRB,18.474563,9.616887,14.385212,12.754085,11.135282,9.527855,,,...,16.582623,21.051128,14.999869,20.951105,17.918939,32.036358,28.246908,19.283042,,
35,Canada,CAN,3.070559,3.438982,3.522586,3.000904,2.727561,2.732694,2.914175,2.619825,...,2.112051,2.436701,2.572649,2.711813,2.680698,2.837809,3.054199,3.090716,,


In [35]:
df_evol = df_correct.copy()  # Crée une copie du dataframe initial

# Calculer la différence entre les années consécutives pour chaque colonne (à partir de la 3ème colonne)
df_evol.iloc[:, 2:] = df_correct.iloc[:, 2:].diff(axis=1)

# Remplacer la première colonne de chaque pays par NaN, car il n'y a pas de valeur précédente pour calculer la différence
df_evol.iloc[:, 2::34] = np.nan

# Renommer les colonnes pour indiquer qu'il s'agit des différences entre les années consécutives
df_evol.columns = df_evol.columns.str.replace('\d{4}', 'Diff')



In [36]:
df_evol

Unnamed: 0,Entity,Code,1990,1991,1992,1993,1994,1995,1996,1997,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
5,Albania,ALB,,,,3.576069,-4.476849,8.121937,0.769901,56.449843,...,1.251024,-5.112799,0.971653,-1.173529,0.42057,0.075715,0.012399,0.160126,,
10,Armenia,ARM,,-0.101474,,,-4.685723,-2.096436,-0.322492,-0.757403,...,0.625688,0.176342,1.177608,-0.734018,-1.520869,1.907045,-2.097379,0.895855,,
13,Australia,AUS,,-0.487485,-0.17332,0.328203,-0.129367,0.099958,0.248401,-0.01419,...,-0.036193,-0.00082,-0.052222,-0.043772,-0.090873,0.150595,-0.062611,-0.081797,,
14,Austria,AUT,,0.173087,-0.070481,-0.249665,-0.259652,0.111625,0.220642,-0.51601,...,-0.294339,0.158608,-0.10151,0.227098,0.203085,-0.118926,-0.208226,-0.069521,,
17,Belgium,BEL,,,,,,,,,...,0.399565,-0.070032,-0.356683,-0.030626,0.182913,-0.470652,,,,
21,Bulgaria,BGR,,2.565476,1.929714,0.958448,-0.045103,0.646519,-1.390856,-1.181227,...,0.324074,0.330572,-1.115399,0.330112,-0.388296,0.189285,-0.483711,0.526549,,
23,"Bahamas, The",BHS,,,,,11.682202,-9.48953,6.439608,-3.359709,...,4.364537,9.576035,-14.168599,5.909615,-14.89999,0.261415,-14.063061,26.064923,,
29,Brazil,BRA,,-2.28757,-2.965706,1.853754,1.549911,4.044806,4.556908,1.424703,...,2.374534,-1.751662,2.536588,1.809967,-7.61171,-10.993234,3.108385,,,
30,Barbados,BRB,,-8.857676,4.768325,-1.631127,-1.618804,-1.607427,,,...,3.733379,4.468505,-6.051259,5.951237,-3.032166,14.117418,-3.78945,-8.963866,,
35,Canada,CAN,,0.368423,0.083604,-0.521682,-0.273343,0.005132,0.181482,-0.29435,...,0.053794,0.32465,0.135949,0.139164,-0.031115,0.157111,0.21639,0.036518,,


In [39]:
df_2 = pd.read_csv('data/drogues/death-rates-cocaine.csv', header = 0)

In [40]:
df_2 = df_2.rename(columns={'Deaths - Cocaine use disorders - Sex: Both - Age: Age-standardized (Rate)' : 'Death_Cocaine'})
# Représente le taux de décés liés à la cocaine standardisé par age.

 # la standardisation des taux de mortalité par âge est une méthode qui permet de rendre les comparaisons entre différentes populations ou à différentes périodes plus significatives en ajustant les données pour tenir compte des différences dans la structure d'âge. Cela aide à éliminer ou à réduire l'effet potentiel des variations dans la répartition d'âge sur les taux de mortalité observés.

In [41]:
df_pivot = df_2 .pivot_table(index=['Code', 'Entity'], columns='Year', values='Death_Cocaine')
df_pivot = df_pivot.reset_index()
df_pivot.set_index('Code')

Year,Entity,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,Afghanistan,0.039427,0.039720,0.040313,0.041282,0.042203,0.042757,0.043227,0.043820,0.044234,...,0.052037,0.052579,0.053343,0.053865,0.054737,0.055858,0.056823,0.057970,0.058994,0.059789
AGO,Angola,0.015872,0.015988,0.016228,0.016562,0.016777,0.016468,0.015717,0.015577,0.016284,...,0.021239,0.022222,0.023306,0.024039,0.024339,0.025246,0.025873,0.027119,0.028209,0.028951
ALB,Albania,0.001465,0.002111,0.002464,0.003790,0.005654,0.007035,0.008206,0.008859,0.010921,...,0.015320,0.015727,0.016141,0.016713,0.017728,0.018758,0.019295,0.019808,0.020268,0.020728
AND,Andorra,0.009574,0.009798,0.009949,0.010001,0.009970,0.009865,0.009812,0.009767,0.009772,...,0.012056,0.011889,0.012121,0.012416,0.012826,0.013235,0.013464,0.013842,0.014121,0.014389
ARE,United Arab Emirates,0.199454,0.216925,0.236998,0.258539,0.280758,0.305722,0.330694,0.359268,0.384823,...,0.590011,0.599523,0.606297,0.620454,0.628419,0.636252,0.645846,0.653888,0.663690,0.672341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WSM,Samoa,0.020597,0.020981,0.021249,0.021144,0.021110,0.021145,0.021045,0.020981,0.020972,...,0.022048,0.022383,0.022772,0.022833,0.023251,0.023985,0.024716,0.025442,0.026143,0.026739
YEM,Yemen,0.022960,0.023134,0.023574,0.024046,0.024670,0.025532,0.026402,0.027332,0.028192,...,0.039946,0.041259,0.042043,0.043374,0.044008,0.045210,0.046383,0.047686,0.049215,0.049768
ZAF,South Africa,0.083247,0.085878,0.093194,0.085748,0.090242,0.092474,0.091607,0.104437,0.098355,...,0.045206,0.040890,0.038072,0.035562,0.033326,0.030968,0.027870,0.025959,0.023435,0.021839
ZMB,Zambia,0.013639,0.014431,0.015078,0.015738,0.016432,0.017229,0.017850,0.018352,0.018779,...,0.025329,0.026050,0.026260,0.026653,0.027110,0.027607,0.028272,0.029181,0.029760,0.030130
