In [1]:
import pandas as pd
from collections import Counter
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from urllib.request import urlopen
import json

In [2]:
accidents_df = pd.read_csv('Datasets/accidents_2017.csv')
air_quality_df = pd.read_csv('Datasets/air_quality_Nov2017.csv')
air_ports_df = pd.read_csv('Datasets/air_stations_Nov2017.csv')
births_df = pd.read_csv('Datasets/births.csv')
bus_stops_df = pd.read_csv('Datasets/bus_stops.csv')
death_df = pd.read_csv('Datasets/deaths.csv')
immigrants_nationality_df = pd.read_csv('Datasets/immigrants_by_nationality.csv')
migrants_age_df = pd.read_csv('Datasets/immigrants_emigrants_by_age.csv')
migrants_destination_df = pd.concat([pd.read_csv('Datasets/immigrants_emigrants_by_destination.csv'), pd.read_csv('Datasets/immigrants_emigrants_by_destination2.csv')])
migrants_sex_df = pd.read_csv('Datasets/immigrants_emigrants_by_sex.csv')
life_expectancy_df = pd.read_csv('Datasets/life_expectancy.csv')
frequent_babies_df = pd.read_csv('Datasets/most_frequent_baby_names.csv')
frequent_people_df = pd.read_csv('Datasets/most_frequent_names.csv')
population_df = pd.read_csv('Datasets/population.csv')
transport_df = pd.read_csv('Datasets/transports.csv')
unemployment_df = pd.read_csv('Datasets/unemployment.csv')

In [45]:
def ageToageGroup(age: str):
    if age == '0-4' or age == '5-9':
        return '0-9'
    elif age == '10-14' or age == '15-19':
        return '10-19'
    elif age == '20-24':
        return '20-24'
    elif age == '25-29' or age == '30-34' or age == '35-39':
        return '25-39'
    elif age == '40-44' or age == '45-49':
        return '40-49'
    elif age == '50-54' or age == '55-59':
        return '50-59'
    else:
        return '60+'
# Neighborhood Wise
Districts = dict()
Districts_widerRanges = dict()
for index, row in population_df.iterrows():
    if row['District.Name'] in Districts:
        Districts[row['District.Name']].update([row['Age']] * row['Number'])
        Districts_widerRanges[row['District.Name']].update([ageToageGroup(row['Age'])] * row['Number'])
    else:
        Districts[row['District.Name']] = Counter([row['Age']] * row['Number'])
        Districts_widerRanges[row['District.Name']] = Counter([ageToageGroup(row['Age'])] * row['Number'])

ageDistribution_df = pd.DataFrame(Districts)

In [46]:
DistrictName = 'Ciutat Vella'
figures = list()

for DistrictName in ageDistribution_df.columns:
    figures.append(go.Bar(x=ageDistribution_df.axes[0], y=(ageDistribution_df[DistrictName]/ageDistribution_df[DistrictName].sum())*100, name=DistrictName))

fig = go.Figure(data=figures)
fig.update_layout(
    xaxis_title='Ages',
    yaxis_title='Percentages',
    # barmode='stack'  # Set the bar mode to 'stack' for stacking
)
fig.show()
pyo.plot(fig, filename='Plots/AgeDistribution_Districts.html', auto_open=False)

'Plots/AgeDistribution_Districts.html'

In [None]:
df1 = unemployment_df.groupby(['District Name', 'Year']).agg({'Number': np.sum}).reset_index().rename(columns={'Number': 'UnemployedCount'})
df2 = immigrants_nationality_df.groupby(['District Name', 'Year']).agg({'Number': np.sum}).reset_index().rename(columns={'Number': 'ImmigrantsCount'})
df = pd.merge(df1, df2, on=['District Name', 'Year'], how='outer')


# Calculate the covariance matrix
for hood in df['District Name'].unique():
    cov_matrix = df[df['District Name'] == hood].cov()

    # Visualize the covariance matrix using a heatmap (Seaborn)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cov_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
    plt.title(hood)
    plt.show()


In [None]:
with urlopen('https://raw.githubusercontent.com/martgnz/bcn-geodata/master/barris/barris.geojson') as response:
    gjson_neigh = json.load(response)
gjson_neigh["features"][7]['properties']['NOM'] = 'el Poble Sec' #rename this district to avoid errors with geojson

transport_df['Neighborhood.Name'] = transport_df['Neighborhood.Name'].replace(['el Poble-sec'], 'el Poble Sec')
temp = transport_df.groupby(['Transport', 'Neighborhood.Name']).nunique().reset_index()

px.choropleth_mapbox(temp,
                     geojson = gjson_neigh, color = 'Station', locations = 'Neighborhood.Name',
                     featureidkey="properties.NOM",
                     color_continuous_scale="Emrld",
                     center={"lat": 41.395, "lon": 2.18},
                     animation_frame ='Transport',
                     mapbox_style="carto-positron", zoom=10.3, opacity=0.9,
                     height=620)

In [3]:
df = population_df.groupby(['Year', 'Age', 'District.Name']).agg({'Number': sum}).reset_index()
districts = df['District.Name'].unique()
color_sequence = sns.color_palette(palette='viridis', n_colors=len(districts))

fig = px.histogram(df, x='Age', y='Number', animation_frame='Year', hover_name='District.Name', color='District.Name', color_discrete_sequence=color_sequence[0])
fig.update_layout(
    xaxis_title='Ages',
    yaxis_title='Population',
    barmode='group'
)
# fig.show()
pyo.plot(fig, filename='Plots/AgeDistribution_Districts.html', auto_open=False)

'Plots/AgeDistribution_Districts.html'

In [7]:
df = population_df.groupby(['Year', 'District.Name']).agg({'Number': np.sum}).reset_index()
districts = df['District.Name'].unique()
color_sequence = sns.color_palette(palette='viridis', n_colors=len(districts))

fig = px.histogram(df, x='District.Name', y='Number', animation_frame='Year', hover_name='District.Name', color='District.Name', color_discrete_sequence=color_sequence[0])
fig.update_layout(
    xaxis_title='Districts',
    yaxis_title='Immigrants',
    barmode='group'
)
# fig.show()
pyo.plot(fig, filename='Plots/MigrantsDistribution_Districts.html', auto_open=False)

'Plots/MigrantsDistribution_Districts.html'