In [1]:
%matplotlib inline
#----------------------------------------------------------------------
#------------------------- IMPORT LIBRARIES ---------------------------
import json # reading geojson files
import matplotlib.pyplot as plt # plotting data
import folium
import pandas as pd
import numpy as np
from unidecode import unidecode

In [2]:
# Data
murders_df = pd.read_csv("./data/asesinatos.csv", sep=";")
slaughters_df = pd.read_csv("./data/masacres.csv", sep=";")

In [3]:
murders_df.head(2)

Unnamed: 0,Día,Mes,Año,Departamento,Municipio,Lugar de Ocurrencia,Tipo de Implicado (1),Tipo de Implicado (2),Nº Víctimas,Fuente,Unnamed: 10
0,6,1,1981.0,Santander,Barrancabermeja,,Grupo Armado No Identificado,Grupo Armado No Identificado,1,Itinerario de la Represión Oficial y Militar e...,
1,7,1,1981.0,Huila,Algeciras,,Grupos Paramilitares,Grupos Paramilitares,2,Itinerario de la Represión Oficial y Militar e...,


In [4]:
slaughters_df.head(2)

Unnamed: 0,Día,Mes,Año,Departamento,Municipio,Lugar de Ocurrencia,Tipo de Implicado,Nº Víctimas,Fuente,Unnamed: 9
0,2,10,1980.0,Caquetá,Puerto Rico,Vereda El Topacio,Grupos Paramilitares,5,Enterrar y Callar Pp. 109 CPDH,
1,24,10,1980.0,Arauca,Tame,IPD La Arabia,Fuerza Pública-Ejército Nacional,5,Enterrar y Callar Pp. 89 Boletín de Prensa CPD...,


In [5]:
#Clean
murders_df = murders_df[~murders_df["Departamento"].isnull()]
slaughters_df = slaughters_df[~slaughters_df["Departamento"].isnull()]
murders_df = murders_df[~murders_df["Nº Víctimas"].isnull()]
slaughters_df = slaughters_df[~slaughters_df["Nº Víctimas"].isnull()]
murders_df["Departamento"] = murders_df["Departamento"].apply(lambda x:x.strip())
murders_df["Nº Víctimas"] = murders_df["Nº Víctimas"].apply(lambda x:x.replace("´1","1"))
murders_df["Departamento"] = murders_df["Departamento"].apply(lambda x:x.replace("Choco","Chocó"))
slaughters_df["Departamento"] = slaughters_df["Departamento"].apply(lambda x:x.replace("Quindio","Quindío"))
murders_df["Departamento"] = murders_df["Departamento"].apply(lambda x:x.replace("meta","Meta"))
murders_df["Departamento"] = murders_df["Departamento"].apply(lambda x:x.replace("César","Cesar"))
slaughters_df["Departamento"] = slaughters_df["Departamento"].apply(lambda x:x.strip())
departaments_mu = set(murders_df["Departamento"].unique())
departaments_sl = set(slaughters_df["Departamento"].unique())
weird_set = departaments_mu - departaments_sl - set(["Vaupés", "San Andrés Isla"]) | set(["Exterior"])
murders_df = murders_df[~murders_df["Departamento"].isin(weird_set)]
weird_set = departaments_sl - departaments_mu - set(["Guainía"]) | set(["Exterior"])
slaughters_df = slaughters_df[~slaughters_df["Departamento"].isin(weird_set)]

In [6]:
#Filter
murders_df = murders_df[["Departamento","Nº Víctimas"]]
murders_df.columns = ["depto","count"]
murders_df["count"] = murders_df["count"].astype(int)
slaughters_df = slaughters_df[["Departamento","Nº Víctimas"]]
slaughters_df.columns = ["depto","count"]

In [7]:
#Join
print(len(murders_df))
print(len(slaughters_df))
df = murders_df.append(slaughters_df)
print(len(df))

16304
1976
18280


In [8]:
df.head(2)

Unnamed: 0,depto,count
0,Santander,1
1,Huila,2


In [9]:
#Names
df["depto"] = df["depto"].apply(lambda x:unidecode(x).upper())
df["depto"] = df["depto"].apply(lambda x:x.replace("DISTRITO CAPITAL","SANTAFE DE BOGOTA D.C"))
df["depto"] = df["depto"].apply(lambda x:x.replace("SAN ANDRES ISLA","ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA"))
df["depto"] = df["depto"].apply(lambda x:x.replace("NARINO","NARIÑO"))

In [10]:
#Group
df_sum = df.groupby("depto").sum()
deptos = set(list(df_sum.index))

In [11]:
df_sum

Unnamed: 0_level_0,count
depto,Unnamed: 1_level_1
ANTIOQUIA,8780
ARAUCA,793
ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA,1
ATLANTICO,337
BOLIVAR,1822
BOYACA,452
CALDAS,769
CAQUETA,559
CASANARE,309
CAUCA,1628


In [12]:
df_sum_count = df_sum.reset_index()
df_sum_count

Unnamed: 0,depto,count
0,ANTIOQUIA,8780
1,ARAUCA,793
2,ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA...,1
3,ATLANTICO,337
4,BOLIVAR,1822
5,BOYACA,452
6,CALDAS,769
7,CAQUETA,559
8,CASANARE,309
9,CAUCA,1628


In [32]:
import branca.colormap as cm
cm.linear

0,1
PuBu,0.01.0
RdYlBu,0.01.0
Pastel1,0.01.0
YlGn,0.01.0
GnBu,0.01.0
Set2,0.01.0
PuRd,0.01.0
PuOr,0.01.0
PuBuGn,0.01.0
PiYG,0.01.0


In [43]:
geo_str = json.dumps(json.load(open("colombia.geojson", 'r')))
threshold_scale = np.linspace(df_sum_count['count'].min(),
                              df_sum_count['count'].max(), 6, dtype=int).tolist()

state_geo = r'colombia.geojson'

mapa = folium.Map(location=[4.7110, -74.0721],
                  tiles="Mapbox Bright",
                  zoom_start=5)

mapa.choropleth(geo_data=state_geo, data=df_sum_count,
             columns=['depto', 'count'],
             key_on='properties.NOMBRE_DPT',
             fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
             threshold_scale = threshold_scale,
             legend_name='Victims of targeted killings and massacres')

mapa.save('victims.html')