## Importar librerías


In [1]:
import pandas as pd
import numpy as np

#Librerías para la visualización de los datos
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import seaborn as sns


# Expresiones regulares
import re
from fuzzywuzzy import process

import warnings
warnings.filterwarnings('ignore')



In [2]:
df_homicide=pd.read_csv(r"C:\Users\usuario\Desktop\Master Data Analytics\PROYECTO FINAL - HEALTH ANALYTICS\Dato\homicide_rate_unodc.csv")

## Análisis de los datos

In [3]:
def check_df(df):
    print("Cuál es la forma del dataframe?")
    display(df.shape)
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Cuáles son las columnas del dataframe?")
    print(df.columns)
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Cuáles son las primeras 5 filas del dataframe?")
    display(df.head())
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Cuáles son las 5 últimas filas?")
    display(df.tail())
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Muéstrame 5 filas aleatorias del dataframe")
    display(df.sample(5))
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Información sobre la tipología de datos:")
    print(df.dtypes)
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Principales datos estadísticos:")
    display(df.describe())
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Existen columnas con valores nulos?")
    print(df.isnull().sum())
    print("\n------------------------------------------------------------------------------------------------------------------------------------")
    print("Existen columnas con valores duplicados?")
    print(df.duplicated().sum())

In [4]:
check_df(df_homicide)

Cuál es la forma del dataframe?


(4204, 4)


------------------------------------------------------------------------------------------------------------------------------------
Cuáles son las columnas del dataframe?
Index(['Entity', 'Code', 'Year',
       'Homicide rate per 100,000 population - Both sexes - All ages'],
      dtype='object')

------------------------------------------------------------------------------------------------------------------------------------
Cuáles son las primeras 5 filas del dataframe?


Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
0,Afghanistan,AFG,2009,4.071527
1,Afghanistan,AFG,2010,3.487093
2,Afghanistan,AFG,2011,4.208668
3,Afghanistan,AFG,2012,6.393913
4,Afghanistan,AFG,2015,9.975262



------------------------------------------------------------------------------------------------------------------------------------
Cuáles son las 5 últimas filas?


Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
4199,Zimbabwe,ZWE,2012,7.395217
4200,Zimbabwe,ZWE,2018,4.876369
4201,Zimbabwe,ZWE,2019,5.145035
4202,Zimbabwe,ZWE,2020,4.97777
4203,Zimbabwe,ZWE,2021,6.139985



------------------------------------------------------------------------------------------------------------------------------------
Muéstrame 5 filas aleatorias del dataframe


Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
3193,Russia,RUS,2009,12.334836
966,Cuba,CUB,1995,7.797411
400,Bahrain,BHR,2001,1.232443
494,Belgium,BEL,1992,1.207521
4075,Uzbekistan,UZB,2001,4.281452



------------------------------------------------------------------------------------------------------------------------------------
Información sobre la tipología de datos:
Entity                                                           object
Code                                                             object
Year                                                              int64
Homicide rate per 100,000 population - Both sexes - All ages    float64
dtype: object

------------------------------------------------------------------------------------------------------------------------------------
Principales datos estadísticos:


Unnamed: 0,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
count,4204.0,4204.0
mean,2006.600856,8.013034
std,8.712915,12.055301
min,1990.0,0.0
25%,2000.0,1.331695
50%,2007.0,3.285863
75%,2014.0,9.409311
max,2021.0,138.77399



------------------------------------------------------------------------------------------------------------------------------------
Existen columnas con valores nulos?
Entity                                                            0
Code                                                            176
Year                                                              0
Homicide rate per 100,000 population - Both sexes - All ages      0
dtype: int64

------------------------------------------------------------------------------------------------------------------------------------
Existen columnas con valores duplicados?
0


In [5]:
df_null=df_homicide[df_homicide["Code"].isnull()]
df_null.head()

Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
11,Africa (UN),,2000,13.645948
12,Africa (UN),,2001,13.589387
13,Africa (UN),,2002,13.524751
14,Africa (UN),,2003,13.095996
15,Africa (UN),,2004,12.850431


In [7]:
df_null["Entity"].unique()

array(['Africa (UN)', 'Americas (UN)', 'Asia (UN)', 'Europe (UN)',
       'Melanesia (UN)', 'Micronesia (UN)', 'Oceania (UN)',
       'Polynesia (UN)'], dtype=object)

### Cuáles son los países con un mayor nivel de suicidios?

In [9]:
df_homicide.sort_values(by="Homicide rate per 100,000 population - Both sexes - All ages", ascending=False).head()

Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
1189,El Salvador,SLV,1995,138.77399
1188,El Salvador,SLV,1994,134.77866
1190,El Salvador,SLV,1996,117.16123
1191,El Salvador,SLV,1997,112.50055
1209,El Salvador,SLV,2015,106.8196


### Cuáles son los países con menor nivel de suicidios?

In [10]:
df_homicide.sort_values(by="Homicide rate per 100,000 population - Both sexes - All ages").head()

Unnamed: 0,Entity,Code,Year,"Homicide rate per 100,000 population - Both sexes - All ages"
2049,Kiribati,KIR,1995,0.0
3226,Saint Helena,SHN,2001,0.0
3227,Saint Helena,SHN,2002,0.0
3228,Saint Helena,SHN,2003,0.0
150,Anguilla,AIA,2005,0.0


## Visualización de los homicidios por regiones

In [11]:
fig=px.line(
    data_frame=df_null,
    y="Homicide rate per 100,000 population - Both sexes - All ages",
    x="Year",
    color="Entity",
    title= "Homicidios por regiones"
            
    )
fig.update_layout(
    title={
        'text': "Homicidios por regiones",
        'x': 0.5,
        'xanchor':'center'

    }
)

El orden de los continentes por niveles de homicidios suele mantenerse en el tiempo. El líder son las Américas seguidas por África, aunque la diferencia converge desde 2020. El tercer lugar está ocupado por Milanesia con una diferencia susancial sobre los países siguientes.