# Visualisations

Une version un clean de la sandbox avec que des visualisations qui fonctionnent.


## Liste des variables

Liste des variables démographiques disponibles par circonscription électorale.

In [None]:
from preprocess import *

df = get_demographics_data()
print(*enumerate(df.columns), sep='\n')

Liste des variables pour le dataset d'immigration par quartier (pas par circonscription !)

In [1]:
from preprocess import *

df = get_immigration_data()
print(*enumerate(df.columns), sep='\n')

(0, 'Arrondissement')
(1, 'Population totale')
(2, 'Non Immigrante')
(3, 'Immigrante')
(4, 'Immigrante Avant 2010')
(5, 'Immigrante 2011 à 2015')
(6, 'Immigrante 2016 à 2021')
(7, 'Résidente Non Permanente')
(8, 'Population totale - Hommes')
(9, 'Non Immigrante - Hommes')
(10, 'Immigrante - Hommes')
(11, 'Immigrante Avant 2010 - Hommes')
(12, 'Immigrante 2011 à 2015 - Hommes')
(13, 'Immigrante 2016 à 2021 - Hommes')
(14, 'Résidente Non Permanente - Hommes')
(15, 'Population totale - Femmes')
(16, 'Non Immigrante - Femmes')
(17, 'Immigrante - Femmes')
(18, 'Immigrante Avant 2010 - Femmes')
(19, 'Immigrante 2011 à 2015 - Femmes')
(20, 'Immigrante 2016 à 2021 - Femmes')
(21, 'Résidente Non Permanente - Femmes')
(22, 'Population totale.1')
(23, 'Immigrante Economique')
(24, 'Demandeure Principale')
(25, 'Demandeure Secondaire')
(26, 'Immigrante parrainée par la famille')
(27, 'Réfugiée')
(28, 'Autre immigrante')
(29, 'Population totale - Hommes.1')
(30, 'Immigrante Economique - Hommes')
(3

In [2]:
print(df['Arrondissement'])

0                  Agglomeration de Montreal
1                          Ville de Montreal
2                      Ahuntsic-Cartierville
3                                      Anjou
4           Cte-des-NeigesNotre-Dame-de-Grce
5                 L'le-BizardSainte-Genevive
6                                    LaSalle
7                                    Lachine
8               MercierHochelaga-Maisonneuve
9                               Montral-Nord
10                                 Outremont
11                       Pierrefonds-Roxboro
12    Rivire-des-PrairiesPointe-aux-Trembles
13                                RosemontLa
14                             Saint-Laurent
15                              Saint-Lonard
16                                    Verdun
17                               Ville-Marie
18        VilleraySaint-MichelParc-Extension
19                     Le_Plateau-Mont-Royal
20                              Le_Sud-Ouest
Name: Arrondissement, dtype: object


Liste des variables pour le dataset de votes.

In [None]:
from preprocess import *

df = get_elections_data()
print(*enumerate(df.columns), sep='\n')

## Cartes

Proportion d'immigrants

In [None]:
from maps import *
from preprocess import *

df = get_demographics_data()
map_data = get_districts_mapdata()

fig = get_map(map_data, df["Immigrants"], zoom='Québec')
fig.show()

Proportion d'immigrants mais en ciblant Montréal seulement

In [None]:
from maps import *
from preprocess import *

df = get_demographics_data()
map_data = get_districts_mapdata()

# Only display the circos in montreal
mask = get_subset_mask(df['Circonscription'], circo_subsets['Montréal'])
color = df['Immigrants'].values
color[~mask] = None

fig = get_map(map_data, color, zoom='montreal')
fig.show()

Population d'immigrants par quartier de Montréal

In [None]:
from maps import *
from preprocess import *

df = get_immigration_data()
map_data = get_neighborhoods_mapdata()

fig = get_map(map_data, zoom='montreal')
fig.show()

Provenance des immigrants d'un certain quartier de Montréal [WIP]

In [None]:
from preprocess import *
from maps import *

arrondissement = 'Ville-Marie'
#arrondissement = 'Ahuntsic-Cartierville'

df = get_immigration_data()
mapdata = get_countries_mapdata()
color = get_countries_of_origin(arrondissement, df, mapdata)

fig = get_map(mapdata, color)
fig.update_layout(title=f'Where do {arrondissement}\'s immigrants come from?', width=1000, height=800)
fig.show()


Nombre de votes par circo, au total ou pour un parti spécifiquement.

In [None]:
from maps import *
from preprocess import *

party = 'P.L.Q./Q.L.P.'
#party = 'Q.S.'

df = get_elections_data()
df = vote_summary_by_circo(df)
#df = vote_summary_by_circo(df[df['abreviationPartiPolitique']==party])
map_data = get_districts_mapdata()

fig = get_map(map_data, df['nbVoteTotal'], zoom='quebec')
fig.show()

## Autres

### Correlation between immigration and vote for a given party (e.g. PLQ)

In [None]:
from preprocess import *
import plotly.express as px
import plotly.graph_objects as go

df_elections = get_elections_data()
df_elections = vote_summary_by_circo(df_elections[df_elections['abreviationPartiPolitique']=='P.L.Q./Q.L.P.'])
df_demographics = get_demographics_data()

assert df_elections['nomCirconscription'].equals(df_demographics['Circonscription'])

df = pd.DataFrame(
    data=np.array([
        df_demographics['Immigrants'].str.replace(',', '.').str.replace('%', '').astype(float),
        df_elections['tauxVote']]).T,
    columns=['Immigrants', 'tauxVote'])

fig = px.scatter(df, x='Immigrants', y='tauxVote')
fig.show()

### Barplots pour quelques variables d'immigration

In [None]:
import plotly.express as px
import plotly.colors as colors
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

to_plot=[
    #'Population totale',
    'Immigrante',
    'Réfugiée',
    'Non permamente, Population résidente non permanente',
    'Continent Asie',
    'Continent Europe',
    'Continent Afrique',
    'Continent Amériques',
    'Continent Oceanie et autres lieux de naissance',
    'Parlant anglais à la maison',
    'Parlant français à la maison',
]

df = pd.read_csv('../assets/data/arrondissements.csv')
df = df.drop([0, 1]).reset_index(drop=True)

fig = make_subplots(rows=len(to_plot), cols=2, shared_xaxes=True, vertical_spacing=0.02, horizontal_spacing=0.1)
for i, col_name in enumerate(to_plot):
    fig.add_trace(go.Bar(
        x=df['Arrondissement'], 
        y=df[col_name], 
        name=col_name, 
        legendgroup=col_name,
        showlegend=True,
        marker_color=colors.qualitative.Plotly[i%len(colors.qualitative.Plotly)]), row=i+1, col=1)
    fig.add_trace(go.Bar(
        x=df['Arrondissement'], 
        y=df[col_name]/df['Population totale'], 
        legendgroup=col_name,
        showlegend=False,
        marker_color=colors.qualitative.Plotly[i%len(colors.qualitative.Plotly)]), row=i+1, col=2)
fig.update_layout(
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1),
    width=1000,
    height=800)
fig.show()

### Pie chart des continents

In [None]:
import plotly.express as px
import plotly.colors as colors
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

to_plot=[
    #'Population totale',
    'Continent Asie',
    'Continent Europe',
    'Continent Afrique',
    'Continent Amériques',
    'Continent Oceanie et autres lieux de naissance',
    #'Parlant anglais à la maison',
    #'Parlant français à la maison',
]

df = pd.read_csv('../assets/data/arrondissements.csv')

fig = go.Figure(go.Pie(
    labels=to_plot,
    values=df.loc[0, to_plot], 
    name='Population totale'))

cols = [k for k in range(134, 193) if not df.columns[k].startswith('Continent')]

fig.show()
fig = go.Figure(go.Bar(
    x=df.columns[cols],
    y=df.iloc[0, cols],))
fig.update_layout(width=800)
fig.show()

### Continents and countries sunburst chart

In [None]:
import plotly.express as px
import plotly.colors as colors
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

to_plot=[
    #'Population totale',
    'Continent Asie',
    'Continent Europe',
    'Continent Afrique',
    'Continent Amériques',
    'Continent Oceanie et autres lieux de naissance',
    #'Parlant anglais à la maison',
    #'Parlant français à la maison',
]

df = pd.read_csv('../assets/data/arrondissements.csv')

idx_continents = [k for k in range(134, 193) if df.columns[k].startswith('Continent')]
idx_countries = [k for k in range(134, 193) if not df.columns[k].startswith('Continent')]
parents_countries = ['Continent Amériques' for i in range(135, 146)]\
    + ['Continent Europe' for i in range(147, 163)]\
    + ['Continent Afrique' for i in range(164, 174)]\
    + ['Continent Asie' for i in range(175, 192)]

idx_americas = [k for k in range(135, 146)]
idx_europe = [k for k in range(147, 163)]
idx_africa = [k for k in range(164, 174)]
idx_asia = [k for k in range(175, 192)]
idx_oceania = [192]


print(df.columns[idx_continents])
print(len(idx_continents), len(idx_countries), len(parents_countries))
print(df.iloc[0, idx_continents])
sums = [sum(df.iloc[0, idx_americas]), sum(df.iloc[0, idx_europe]), sum(df.iloc[0, idx_africa]), sum(df.iloc[0, idx_asia]), sum(df.iloc[0, idx_oceania])]
print(sums)

labels = df.columns[idx_continents].to_list()\
        + df.columns[idx_americas].to_list()\
        + df.columns[idx_europe].to_list()\
        + df.columns[idx_africa].to_list()\
        + df.columns[idx_asia].to_list()\
        + df.columns[idx_oceania].to_list()

parents = ['']*len(idx_continents)\
        + [df.columns[idx_continents[0]]]*len(idx_americas)\
        + [df.columns[idx_continents[1]]]*len(idx_africa)\
        + [df.columns[idx_continents[2]]]*len(idx_europe)\
        + [df.columns[idx_continents[3]]]*len(idx_asia)\
        #+ [df.columns[idx_continents[4]]]*len(idx_oceania)

values = [0]*len(idx_continents) + df.iloc[0, idx_americas+idx_europe+idx_africa+idx_asia+idx_oceania].values.tolist()

fig = go.Figure(go.Sunburst(
    ids=labels,
    labels=labels,
    parents=parents,
    values=values,
))
fig.update_layout(width=800, height=800)
fig.show()