# Evolució al llarg del temps

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Variables

dataframes_naix = ['data/2024_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2023_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2022_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2021_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2020_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2019_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2018_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2017_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2016_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2015_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2014_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2013_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2012_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2011_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2010_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2009_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2008_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2007_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2006_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2005_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2004_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2003_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2002_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv']

# Funcions

## Processament 

def process_files(dataframes, pais):
    totals = []

    for dataframe in dataframes:
        df = pd.read_csv(dataframe)
        df['Valor'] = pd.to_numeric(df['Valor'].replace('..', 2), errors='coerce').fillna(0).astype(int) 
        
        year = int(dataframe[5:9])
        if pais == 58:
            total_immigrants = df.loc[df['LLOC_NAIX_PAIS'] != pais, 'Valor'].sum() 
        else:
            total_immigrants = df.loc[df['LLOC_NAIX_PAIS'] == pais, 'Valor'].sum() 
        total_espanyols = df.loc[df['LLOC_NAIX_PAIS'] == 58, 'Valor'].sum()

        totals.append({
            'year': year,
            'total_immigrants': total_immigrants,
            'total_espanyols': total_espanyols
        })

    return pd.DataFrame(totals).sort_values('year').reset_index(drop=True) 

def compute_all_differences(df):
    df = df.copy() 
    df['immigrant_diff'] = df['total_immigrants'].diff()
    df['espanyol_diff'] = df['total_espanyols'].diff()
    return df

def generate_random_distribution(total, n):
    aleatoris = np.random.randint(0, n, abs(total)) 
    aleatoris_sumats_pre = np.unique(aleatoris, return_counts=True)
    aleatoris_sumats = np.zeros(n, dtype=int) 
    for parella in zip(aleatoris_sumats_pre[0], aleatoris_sumats_pre[1]):
        aleatoris_sumats[parella[0]] = parella[1]
    
    if total < 0: 
        aleatoris_sumats = -aleatoris_sumats
    return aleatoris_sumats

def build_yearly_census_df_cumulative(seccions, base_year_data, yearly_diffs):
    results = []

    n = len(seccions)

    current_immigrants_total = base_year_data['total_immigrants']
    current_espanyols_total = base_year_data['total_espanyols']
    current_year = base_year_data['year']

    immigrants_dist = generate_random_distribution(current_immigrants_total, n)
    espanyols_dist = generate_random_distribution(current_espanyols_total, n)

    for i, seccio in enumerate(seccions):
        results.append({
            'year': current_year,
            'Seccio_censal': seccio,
            'Immigrants': immigrants_dist[i],
            'Spanish': espanyols_dist[i]
        })

    for diff in yearly_diffs:
        current_year = diff['to_year']
        immigrant_diff = diff['immigrant_diff']
        espanyol_diff = diff['espanyol_diff']

        immigrants_change = generate_random_distribution(immigrant_diff, n)
        espanyols_change = generate_random_distribution(espanyol_diff, n)

        immigrants_dist = np.maximum(immigrants_dist + immigrants_change, 0)
        espanyols_dist = np.maximum(espanyols_dist + espanyols_change, 0)

        for i, seccio in enumerate(seccions):
            results.append({
                'year': current_year,
                'Seccio_censal': seccio,
                'Immigrants': immigrants_dist[i],
                'Spanish': espanyols_dist[i]
            })

    return pd.DataFrame(results)

## Càlculs

def calcul_index_of_dissimilarity (dataframe_name, pais):

    dataframe =  pd.read_csv(dataframe_name)
    dataframe['Valor'] = pd.to_numeric(dataframe['Valor'].replace('..', 2))

    seccions_unique = set(dataframe['Seccio_Censal'])

    if pais == 58:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] != pais]
    else:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == pais]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(immigrants_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    immigrants_df = pd.concat([immigrants_df, seccions_mancants_df], ignore_index=True)

    immigrants_df['sumats'] = immigrants_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    immigrants_df = immigrants_df.drop_duplicates('Seccio_Censal')
    immigrants_df = immigrants_df[['Seccio_Censal', 'sumats']]
    immigrants_df = immigrants_df.sort_values(by='Seccio_Censal').reset_index(drop=True)
    

    espanyols_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == 58]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(espanyols_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    espanyols_df = pd.concat([espanyols_df, seccions_mancants_df], ignore_index=True)

    espanyols_df['sumats'] = espanyols_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    espanyols_df = espanyols_df.drop_duplicates('Seccio_Censal')
    espanyols_df = espanyols_df[['Seccio_Censal', 'sumats']]
    espanyols_df = espanyols_df.sort_values(by='Seccio_Censal').reset_index(drop=True)

    total_immigrants = immigrants_df['sumats'].sum()
    total_espanyols = espanyols_df['sumats'].sum()

    id_per_seccio = .5 * np.abs((np.array(immigrants_df['sumats'].to_list())/total_immigrants) - np.array(espanyols_df['sumats'].to_list())/total_espanyols)
    id_total = id_per_seccio.sum()

    return (id_total, id_per_seccio.tolist())


def calcul_index_of_dissimilarity_from_df(df):
    """
    df must contain: 'year', 'Seccio_censal', 'Immigrants', 'Spanish'
    """
    total_immigrants = df['Immigrants'].sum()
    total_spanish = df['Spanish'].sum()

    df['dissimilarity_component'] = 0.5 * abs((df['Immigrants'] / total_immigrants) - (df['Spanish'] / total_spanish))

    id_total = df['dissimilarity_component'].sum()
    id_per_seccio = df.set_index('Seccio_censal')['dissimilarity_component'].to_dict()

    return id_total, list(id_per_seccio.values())

def calcul_id_mitja_random(dataframes, pais, seccions):
    totals_df = process_files(dataframes, pais)
    diffs_df = compute_all_differences(totals_df)

    base_total = {
        'year': int(diffs_df.loc[0, 'year']),
        'total_immigrants': int(diffs_df.loc[0, 'total_immigrants']),
        'total_espanyols': int(diffs_df.loc[0, 'total_espanyols'])
    }

    year_diff = []
    for i in range(1, len(diffs_df)):
        year_diff.append({
            'from_year': int(diffs_df.loc[i-1, 'year']),
            'to_year': int(diffs_df.loc[i, 'year']),
            'immigrant_diff': int(diffs_df.loc[i, 'immigrant_diff']),
            'espanyol_diff': int(diffs_df.loc[i, 'espanyol_diff']),
        })

    df = build_yearly_census_df_cumulative(seccions, base_total, year_diff)

    ids_by_year = {}

    for year, group in df.groupby('year'):
        ids_by_year[year] = calcul_index_of_dissimilarity_from_df(group)

    return ids_by_year

def calcular_effective(mean_id_by_year, ids):
    sorted_years_random = sorted(mean_id_by_year.keys())
    id_values_random = [mean_id_by_year[year] for year in sorted_years_random]
    id_values = [ids[year][0] for year in sorted_years_random]

    final_df = pd.DataFrame({
        'Year': sorted_years_random,
        'id_values': id_values,
        'id_values_random': id_values_random
    })

    for i in range(len(final_df)):
        final_df.loc[i, 'ID'] = ((id_values[i] - id_values_random[i]) / (1 - id_values_random[i]))

    return final_df

## Plots

# def plot_index_of_dissimilarity_per_pais(ids, country_name, dir):
#     sorted_years = sorted(ids.keys())
#     id_values = [ids[year][0] for year in sorted_years]

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years, id_values, marker='o', linestyle='-', color='turquoise', label='Index of dissimilarity')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_year_value.png', dpi=300)
#     plt.close()

# def plot_index_of_dissimilarity_random_per_pais(mean_id_by_year, country_name, dir):
#     sorted_years_random = sorted(mean_id_by_year.keys())
#     id_values_random = [mean_id_by_year[year] for year in sorted_years_random]

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years_random, id_values_random, marker='o', linestyle='-', color='grey', label='Random index of dissimilarity')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Random index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years_random, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_year_value_random.png', dpi=300)
#     plt.close()

# def plot_index_of_dissimilarity_per_pais_junts(mean_id_by_year, ids, country_name, dir):
#     sorted_years_random = sorted(mean_id_by_year.keys())
#     id_values_random = [mean_id_by_year[year] for year in sorted_years_random]
#     id_values = [ids[year][0] for year in sorted_years_random]

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years_random, id_values_random, marker='o', linestyle='-', color='grey', label='Random index of dissimilarity')
#     plt.plot(sorted_years_random, id_values, marker='o', linestyle='-', color='turquoise', label='Real index of dissimilarity')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Comparison of real and random index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years_random, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_year_value_comparison.png', dpi=300)
#     plt.close()

def plot_index_of_dissimilarity_effective (final_df, country_name, dir):
    plt.figure(figsize=(5,3))
    plt.plot(final_df['Year'], final_df['ID'], marker='o', linestyle='-', color='green')#, label='Effective index of dissimilarity')
    plt.xlabel("Year")
    plt.ylabel("Index value")
    # plt.title(f"Effective index of dissimilarity by year in Barcelona ({country_name})")
    plt.xticks(final_df['Year'][::2], rotation=45)
    plt.grid(True, linestyle='--', alpha=0.5)
    #plt.legend(fontsize = 10)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.tight_layout()
    plt.savefig(dir + f'{country_name}_year_value_effective.png', dpi=300)
    plt.close()

def plot_index_of_dissimilarity_effective_comparison(data, country_names, dir):
    colours = ['#0cb2af', '#a1c65d', '#fac723', '#f29222', '#e95e50', '#936fac']
    plt.figure(figsize=(10, 6))
    for i in range(len(data)):
        plt.plot(data[i]['Year'], data[i]['ID'], marker='o', linestyle='-', color=colours[i], label=f'Effective index of dissimilarity ({country_names[i]})')
    plt.xlabel("Year")
    plt.ylabel("Index value")
    # plt.title(f"Effective index of dissimilarity by year in Barcelona (country comparision)")
    plt.xticks(data[0]['Year'][::2], rotation=45)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend(fontsize = 12)
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.tight_layout()
    plt.savefig(dir + f'countries_comparison_year_value_effective.png', dpi=300)
    plt.close()

In [14]:
paisos_a_considerar_morans = [65, 108, 11, 67, 174, 58]
paisos_a_considerar_quantitat = [11, 39, 161, 167, 230, 58]
country_names_morans = ['Philippines','Italy', 'Argentina', 'France', 'United Kingdom', 'Foreigners']
country_names_quantitat = ['Argentina', 'Colombia', 'Pakistan', 'Peru', 'Venezuela', 'Foreigners']
for paisos_a_considerar,country_names,dir in [[paisos_a_considerar_morans, country_names_morans, 'dissimilarity_results/higher_morans/'],[paisos_a_considerar_quantitat, country_names_quantitat,'dissimilarity_results/higher_quantity/']]:
    data = []
    output_table = {}
    for i,pais in enumerate(paisos_a_considerar):
        country_name = country_names[i]
        print(f"Working on: {country_name}")

        ids = {}
        for dataframe in dataframes_naix:
            ids[int(dataframe[5:9])] = calcul_index_of_dissimilarity(dataframe, pais)
        
        #plot_index_of_dissimilarity_per_pais(ids, country_name, dir)

        seccions = pd.read_csv(dataframes_naix[0])
        seccions = seccions['Seccio_Censal'].unique()
        
        vegades = 10
        ids_random = {}
        ids_random_mean = {} 

        for i in range(vegades):
            ids_random[i] = calcul_id_mitja_random(dataframes_naix, pais, seccions)

            for year in ids_random[i].keys():
                if ids_random_mean.get(year) is None:
                    ids_random_mean[year] = []
                ids_random_mean[year].append(ids_random[i][year][0])

        mean_id_by_year = {year: np.mean(values) for year, values in ids_random_mean.items()}

        #plot_index_of_dissimilarity_random_per_pais(mean_id_by_year, country_name, dir)

        #plot_index_of_dissimilarity_per_pais_junts(mean_id_by_year, ids, country_name, dir)

        final_df = calcular_effective(mean_id_by_year, ids)
        data.append(final_df)

        plot_index_of_dissimilarity_effective(final_df, country_name, dir)
        
        sorted_years = sorted(ids.keys())
        id_values = [ids[year][0] for year in sorted_years]
        id_values_random = [mean_id_by_year[year] for year in sorted_years]

        output_table[country_name] = {'DI real': id_values, 'DI random': id_values_random, 'DI effective': final_df['ID'].tolist()}

    plot_index_of_dissimilarity_effective_comparison(data, country_names, dir)
    
    with open(dir + 'taula.csv', 'w') as f:
        f.write('Year,')
        for year in range(2002,2025):
            f.write(f'{year},')
        f.write('\n')
        for country in output_table.keys():
            f.write(country + '\n')
            for index in output_table[country]:
                f.write(index + ',')
                for val in output_table[country][index]:
                    f.write('{:.3f}'.format(val) + ',')
                f.write('\n')

Working on: Philippines
Working on: Italy
Working on: Argentina
Working on: France
Working on: United Kingdom
Working on: Foreigners
Working on: Argentina
Working on: Colombia
Working on: Pakistan
Working on: Peru
Working on: Venezuela
Working on: Foreigners


# Mapes

In [21]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Funcions

## Càlculs

def calcul_index_of_dissimilarity_per_seccio (dataframe_name, pais):

    dataframe =  pd.read_csv(dataframe_name)
    dataframe['Valor'] = pd.to_numeric(dataframe['Valor'].replace('..', 2))

    seccions_unique = set(dataframe['Seccio_Censal'])

    if pais == 58:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] != pais]
    else:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == pais]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(immigrants_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    immigrants_df = pd.concat([immigrants_df, seccions_mancants_df], ignore_index=True)

    immigrants_df['sumats'] = immigrants_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    immigrants_df = immigrants_df.drop_duplicates('Seccio_Censal')
    immigrants_df = immigrants_df[['Seccio_Censal', 'sumats']]
    immigrants_df = immigrants_df.sort_values(by='Seccio_Censal').reset_index(drop=True)
    

    espanyols_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == 58]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(espanyols_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    espanyols_df = pd.concat([espanyols_df, seccions_mancants_df], ignore_index=True)

    espanyols_df['sumats'] = espanyols_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    espanyols_df = espanyols_df.drop_duplicates('Seccio_Censal')
    espanyols_df = espanyols_df[['Seccio_Censal', 'sumats']]
    espanyols_df = espanyols_df.sort_values(by='Seccio_Censal').reset_index(drop=True)

    total_immigrants = immigrants_df['sumats'].sum()
    total_espanyols = espanyols_df['sumats'].sum()

    id_per_seccio = .5 * np.abs((np.array(immigrants_df['sumats'].to_list())/total_immigrants) - np.array(espanyols_df['sumats'].to_list())/total_espanyols)

    return id_per_seccio.tolist()

## Plots

def plot_dades_mapa_seccio_censal (data, country_name, dir):
    gdf = gpd.read_file("data/seccio-censal.geojson") 

    gdf['ordre_seccio'] = gdf["DISTRICTE"] + gdf["SEC_CENS"]
    gdf = gdf.sort_values(by='ordre_seccio').reset_index(drop=True)
    gdf['data'] = data

    fig, ax = plt.subplots(figsize=(10, 10))
    gdf.plot(
        ax=ax, 
        column='data', 
        edgecolor='black', 
        legend=True, 
        legend_kwds={'shrink': 0.6, 'label': '', 'orientation': 'vertical'}, 
        missing_kwds={'color': 'white'}, 
        cmap='Greens'
    )

    # Remove axis labels and ticks
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xticks([])
    ax.set_yticks([])

    # Remove axis spines (borders)
    for spine in ax.spines.values():
        spine.set_visible(False)

    plt.savefig(dir + f"{country_name}_mapa_barcelona_per_seccio.png", dpi=300)
    plt.close()

In [23]:
paisos_a_considerar_morans = [65, 108, 11, 67, 174, 58]
paisos_a_considerar_quantitat = [11, 39, 161, 167, 230, 58]
country_names_morans = ['Philippines','Italy', 'Argentina', 'France', 'United Kingdom', 'Foreigners']
country_names_quantitat = ['Argentina', 'Colombia', 'Pakistan', 'Peru', 'Venezuela', 'Foreigners']
dataframe_name = 'data/2024_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv'

for paisos_a_considerar,country_names,dir in [[paisos_a_considerar_morans, country_names_morans, 'dissimilarity_results/higher_morans/'],[paisos_a_considerar_quantitat, country_names_quantitat,'dissimilarity_results/higher_quantity/']]:
    for i,pais in enumerate(paisos_a_considerar):
        country_name = country_names[i]
        print(f"Working on: {country_name}")

        ids_per_seccio = calcul_index_of_dissimilarity_per_seccio(dataframe_name, pais)
        plot_dades_mapa_seccio_censal(ids_per_seccio, country_name, dir)


Working on: Philippines
Working on: Italy
Working on: Argentina
Working on: France
Working on: United Kingdom
Working on: Foreigners
Working on: Argentina
Working on: Colombia
Working on: Pakistan
Working on: Peru
Working on: Venezuela
Working on: Foreigners


# Barris

In [25]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Funcions

## Càlculs

def calcul_index_of_dissimilarity_per_barri (dataframe_name, pais):

    dataframe =  pd.read_csv(dataframe_name)
    dataframe['Valor'] = pd.to_numeric(dataframe['Valor'].replace('..', 2))

    barris_unique = set(dataframe['Codi_Barri'])

    if pais == 58:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] != pais]
    else:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == pais]

    barris_mancants = [barri for barri in barris_unique if not (barri in list(immigrants_df['Codi_Barri']))]
    barris_mancants_df = pd.DataFrame({'Codi_Barri': barris_mancants, 'Valor': [0]*len(barris_mancants)})
    immigrants_df = pd.concat([immigrants_df, barris_mancants_df], ignore_index=True)

    immigrants_df['sumats'] = immigrants_df.groupby('Codi_Barri')['Valor'].transform('sum')
    immigrants_df = immigrants_df.drop_duplicates('Codi_Barri')
    immigrants_df = immigrants_df[['Codi_Barri', 'sumats']]
    immigrants_df = immigrants_df.sort_values(by='Codi_Barri').reset_index(drop=True)
    

    espanyols_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == 58]

    barris_mancants = [barri for barri in barris_unique if not (barri in list(espanyols_df['Codi_Barri']))]
    barris_mancants_df = pd.DataFrame({'Codi_Barri': barris_mancants, 'Valor': [0]*len(barris_mancants)})
    espanyols_df = pd.concat([espanyols_df, barris_mancants_df], ignore_index=True)

    espanyols_df['sumats'] = espanyols_df.groupby('Codi_Barri')['Valor'].transform('sum')
    espanyols_df = espanyols_df.drop_duplicates('Codi_Barri')
    espanyols_df = espanyols_df[['Codi_Barri', 'sumats']]
    espanyols_df = espanyols_df.sort_values(by='Codi_Barri').reset_index(drop=True)

    total_immigrants = immigrants_df['sumats'].sum()
    total_espanyols = espanyols_df['sumats'].sum()

    id_per_barri = .5 * np.abs((np.array(immigrants_df['sumats'].to_list())/total_immigrants) - np.array(espanyols_df['sumats'].to_list())/total_espanyols)

    return id_per_barri.tolist()

## Plots

def plot_dades_mapa_barri (data, country_name, dir):
    gdf = gpd.read_file("data/barris.geojson") 

    gdf = gdf.sort_values(by='BARRI').reset_index(drop=True)
    gdf['data'] = data

    fig, ax = plt.subplots(figsize=(10, 10))
    gdf.plot(
        ax=ax, 
        column='data', 
        edgecolor='black', 
        legend=True, 
        legend_kwds={'shrink': 0.6, 'label': '', 'orientation': 'vertical'}, 
        missing_kwds={'color': 'white'}, 
        cmap='Greens'
    )

    # Remove axis labels and ticks
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xticks([])
    ax.set_yticks([])

    # Remove axis spines (borders)
    for spine in ax.spines.values():
        spine.set_visible(False)

    plt.savefig(dir + f"{country_name}_mapa_barcelona_per_barri.png", dpi=300)
    plt.close()

In [26]:
paisos_a_considerar_morans = [65, 108, 11, 67, 174, 58]
paisos_a_considerar_quantitat = [11, 39, 161, 167, 230, 58]
country_names_morans = ['Philippines','Italy', 'Argentina', 'France', 'United Kingdom', 'Foreigners']
country_names_quantitat = ['Argentina', 'Colombia', 'Pakistan', 'Peru', 'Venezuela', 'Foreigners']
dataframe_name = 'data/2024_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv'

for paisos_a_considerar,country_names,dir in [[paisos_a_considerar_morans, country_names_morans, 'dissimilarity_results/higher_morans/'],[paisos_a_considerar_quantitat, country_names_quantitat,'dissimilarity_results/higher_quantity/']]:
    for i,pais in enumerate(paisos_a_considerar):
        country_name = country_names[i]
        print(f"Working on: {country_name}")

        ids_per_barri = calcul_index_of_dissimilarity_per_barri(dataframe_name, pais)
        plot_dades_mapa_barri(ids_per_barri, country_name, dir)


Working on: Philippines
Working on: Italy
Working on: Argentina
Working on: France
Working on: United Kingdom
Working on: Foreigners
Working on: Argentina
Working on: Colombia
Working on: Pakistan
Working on: Peru
Working on: Venezuela
Working on: Foreigners
