# Data visualization of incidents in Barcelona's neighborhoods 2019

#### Import libraries

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

print('Libraries imported.')

#### Obtain neighborhood database with coordinates

In [None]:
import types
from botocore.client import Config
import ibm_boto3

In [None]:
# The code was removed by Watson Studio for sharing.

In [None]:
body = client_335529e8571343eaa4cecdd51c945c46.get_object(Bucket='capstoneproject-donotdelete-pr-q6jyeyrogmggrb',Key='neighborhoods_coordinates_barcelona.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_coordinates = pd.read_csv(body)
df_coordinates.head()

#### Removed columns from df_coordinates

In [None]:
new_columns = ['Neighborhood Code', 'Neighborhood Latitude', 'Neighborhood Longitude']
df_coordinates = df_coordinates.loc[:, new_columns]
df_coordinates.head()

## Clean Data

#### Import 

In [None]:
def __iter__(self): return 0

body = client_335529e8571343eaa4cecdd51c945c46.get_object(Bucket='capstoneproject-donotdelete-pr-q6jyeyrogmggrb',Key='2019_incidents_gestionats_gub.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df = pd.read_csv(body)
df.head()

#### Select needed columns

In [None]:
df.columns

In [None]:
new_columns = ['Descripcio_Incident', 'Codi_districte', 'Nom_districte', 'Codi_barri', 'Nom_barri', 'Mes_any', 'Numero_incidents_GUB']
df = df.loc[:, new_columns]
df.head()

In [None]:
df.dtypes

#### Change columns names

In [None]:
new_columns_name_dict = {'Descripcio_Incident': 'Incident', 'Codi_barri': 'Neighborhood Code', 'Nom_barri': 'Neighborhood', 'Codi_districte': 'District Code', 'Nom_districte': 'Disctrict', 'Mes_any' : 'Month', 'Numero_incidents_GUB': 'Number of Incidents'}
df.rename(columns=new_columns_name_dict, inplace=True)
df.head()

In [None]:
df.shape

In [None]:
df["Neighborhood"].unique()

#### Removed NaN and Desconegut from Neighborhood Column

In [None]:
# Get names of indexes for which column Age has value 30
indexNames = df[df['Neighborhood'] == 'Desconegut'].index
# Delete these row indexes from dataFrame
df.drop(indexNames , inplace=True)

In [None]:
df = df.dropna(subset=['Neighborhood'], axis=0).reset_index(drop=True)

In [None]:
df["Neighborhood"].unique()

#### Change columns types

In [None]:
df = df.astype({'District Code': 'int64', 'Number of Incidents': 'int64'})

In [None]:
df.dtypes

#### Add the coordinates to the existing data frame

In [None]:
# Merge both data frames so all the neighborhoods have their coordinates
df = pd.merge(df, df_coordinates, how='left', on='Neighborhood Code')
df.head()

In [None]:
df.shape

In [None]:
df = df.dropna(subset=['Neighborhood Latitude'], axis=0).reset_index(drop=True)
df.head()

In [None]:
df.shape

#### Convert Month column in one column for each month

In [None]:
# Turning categorical variables into quantitative variables
dummy_variable = pd.get_dummies(df['Month'])

In [None]:
# Concat new columns
# merge data frame "df" and "dummy_variable_1" 
df_incidents_months = pd.concat([df, dummy_variable], axis=1)

# Drop original Month column
df_incidents_months.drop('Month', axis = 1, inplace=True)
df_incidents_months.head()

In [None]:
# Order Months columns
column_order = ['District Code', 'Disctrict', 'Neighborhood Code', 'Neighborhood', 'Incident', 'Number of Incidents', 'Neighborhood Latitude', 'Neighborhood Longitude', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
df_incidents_months = df_incidents_months.reindex(columns=column_order)
df_incidents_months.head()

#### Add the number of incidents to the months columns

In [None]:
number_incidents_month = df_incidents_months[['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']].multiply(df_incidents_months['Number of Incidents'], axis="index")

In [None]:
number_incidents_month.head()

In [None]:
df_incidents_months.drop(['Number of Incidents', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'], axis = 1, inplace=True)
df_incidents_months.head()

In [None]:
df_incidents_months = pd.concat([df_incidents_months, number_incidents_month], axis=1)
df_incidents_months.head()

#### Sum all the incidents by neighborhood

In [None]:
df_neigh_sum = df_incidents_months.drop(['Incident'], axis=1)
df_neigh_sum.head()

In [None]:
df_neigh_sum = df_incidents_months.groupby(['Neighborhood'])[['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']].agg('sum')
df_neigh_sum.head()

In [None]:
df_neigh_sum['Total'] = df_neigh_sum.sum(axis=1)
df_neigh_sum.head()

In [None]:
df_neigh_sum.sort_values(by='Total', ascending = False, inplace=True)
df_neigh_sum.head()

#### Sum all the incidents by Incident and neighborhood

In [None]:
df_incidentsby_neigh_sum = df_incidents_months.groupby(['Neighborhood','Incident'])[['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']].agg('sum')
df_incidentsby_neigh_sum.head()

In [None]:
df_incidentsby_neigh_sum['Total'] = df_incidentsby_neigh_sum.sum(axis=1)
df_incidentsby_neigh_sum.head()

In [None]:
df_incidentsby_neigh_sum.sort_values(by='Total', ascending = False, inplace=True)
df_incidentsby_neigh_sum.head()

#### Count of incidents

In [None]:
df_incidents_sum = df_incidents_months.groupby(['Incident'])[['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']].agg('sum')
df_incidents_sum.head()

In [None]:
df_incidents_sum['Total'] = df_incidents_sum.sum(axis=1)
df_incidents_sum.head()

In [None]:
df_incidents_sum.sort_values(by='Total', ascending = False, inplace=True)
df_incidents_sum.head()

## Data Visual Analysis

In [None]:
%matplotlib inline 

import matplotlib as mpl
import matplotlib.pyplot as plt

### Visualize months vs number of incidents in each neighborhood

In [None]:
months = list(map(str, range(1, 13)))
months

In [None]:
#As there are many neighborhoods, just the 10th firts with the higher incidents rates has been displayed
df_neigh_sum_sorted=df_neigh_sum.sort_values(by='Total', ascending=False)
top_df=df_neigh_sum_sorted.head(10)
top_df=top_df.loc[top_df.index, months]
top_df=top_df.transpose()

In [None]:
top_df.plot(kind='line', figsize=(10, 6))

plt.title('Top 10 neighborhoods with highest number of incidents in each month of 2019')
plt.ylabel('Number of incidents')
plt.xlabel('Months')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.show()

### Visualize total number of incidents by neigborhood

In [None]:
df_neigh_total = df_neigh_sum.drop(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'], axis = 1)
df_neigh_total.head()

In [None]:
df_neigh_total.plot(kind='bar', figsize=(10, 6))

plt.xlabel('Neighborhood')
plt.ylabel('Total')
plt.title('Total number of incidents in 2019 by neighborhood')

plt.show()

### Visualize total number of each type of incident

In [None]:
df_incidents_total = df_incidents_sum.drop(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'], axis = 1)
df_incidents_total.head()

In [None]:
# Remove white space at the end of Incidents values
df_incidents_total.index = df_incidents_total.index.str.rstrip()

In [None]:
# As there are many types of incidents, just the 10th firts with the higher incidents rates has been displayed
df_incidents_total_sorted=df_incidents_total.sort_values(by='Total', ascending=False)
top_incidents_df=df_incidents_total_sorted.head(10)

In [None]:
top_incidents_df.plot(kind='bar', figsize=(10, 6))

plt.xlabel('Type of Incident')
plt.ylabel('Total')
plt.title('Top 10 total number of incidents in 2019 by type of incident')

plt.show()