# Evolució al llarg del temps

In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Variables 

dataframes_naix = ['data/2024_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2023_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2022_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2021_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2020_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2019_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2018_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2017_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2016_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2015_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2014_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2013_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2012_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2011_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2010_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2009_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2008_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2007_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2006_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2005_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2004_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2003_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv',
'data/2002_pad_mdbas_lloc-naix-pais_lloc-naix-continent_sexe.csv']

# Funcions

## Processament

def process_files(dataframes, pais):
    totals = []

    for dataframe in dataframes:
        df = pd.read_csv(dataframe)
        df['Valor'] = pd.to_numeric(df['Valor'].replace('..', 2), errors='coerce').fillna(0).astype(int) 
        
        year = int(dataframe[5:9])
        if pais == 58:
            total_immigrants = df.loc[df['LLOC_NAIX_PAIS'] != pais, 'Valor'].sum() 
        else:
            total_immigrants = df.loc[df['LLOC_NAIX_PAIS'] == pais, 'Valor'].sum() 
        total_espanyols = df.loc[df['LLOC_NAIX_PAIS'] == 58, 'Valor'].sum()

        totals.append({
            'year': year,
            'total_immigrants': total_immigrants,
            'total_espanyols': total_espanyols
        })

    return pd.DataFrame(totals).sort_values('year').reset_index(drop=True) 

def compute_all_differences(df):
    df = df.copy() 
    df['immigrant_diff'] = df['total_immigrants'].diff()
    df['espanyol_diff'] = df['total_espanyols'].diff()
    return df

def generate_random_distribution(total, n):
    aleatoris = np.random.randint(0, n, abs(total)) 
    aleatoris_sumats_pre = np.unique(aleatoris, return_counts=True)
    aleatoris_sumats = np.zeros(n, dtype=int) 
    for parella in zip(aleatoris_sumats_pre[0], aleatoris_sumats_pre[1]):
        aleatoris_sumats[parella[0]] = parella[1]
    
    if total < 0: 
        aleatoris_sumats = -aleatoris_sumats
    return aleatoris_sumats

def build_yearly_census_df_cumulative(seccions, base_year_data, yearly_diffs):
    results = []

    n = len(seccions)

    current_immigrants_total = base_year_data['total_immigrants']
    current_espanyols_total = base_year_data['total_espanyols']
    current_year = base_year_data['year']

    immigrants_dist = generate_random_distribution(current_immigrants_total, n)
    espanyols_dist = generate_random_distribution(current_espanyols_total, n)

    for i, seccio in enumerate(seccions):
        results.append({
            'year': current_year,
            'Seccio_censal': seccio,
            'Immigrants': immigrants_dist[i],
            'Spanish': espanyols_dist[i]
        })

    for diff in yearly_diffs:
        current_year = diff['to_year']
        immigrant_diff = diff['immigrant_diff']
        espanyol_diff = diff['espanyol_diff']

        immigrants_change = generate_random_distribution(immigrant_diff, n)
        espanyols_change = generate_random_distribution(espanyol_diff, n)

        immigrants_dist = np.maximum(immigrants_dist + immigrants_change, 0)
        espanyols_dist = np.maximum(espanyols_dist + espanyols_change, 0)

        for i, seccio in enumerate(seccions):
            results.append({
                'year': current_year,
                'Seccio_censal': seccio,
                'Immigrants': immigrants_dist[i],
                'Spanish': espanyols_dist[i]
            })

    return pd.DataFrame(results)

## Càlculs

def co_neighbour_1(dataframe_name, pais):

    dataframe =  pd.read_csv(dataframe_name)
    dataframe['Valor'] = pd.to_numeric(dataframe['Valor'].replace('..', 2))

    seccions_unique = set(dataframe['Seccio_Censal'])

    if pais == 58:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] != pais]
    else:
        immigrants_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == pais]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(immigrants_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    immigrants_df = pd.concat([immigrants_df, seccions_mancants_df], ignore_index=True)

    immigrants_df['sumats'] = immigrants_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    immigrants_df = immigrants_df.drop_duplicates('Seccio_Censal')
    immigrants_df = immigrants_df[['Seccio_Censal', 'sumats']]
    immigrants_df = immigrants_df.sort_values(by='Seccio_Censal').reset_index(drop=True)
    

    espanyols_df = dataframe[dataframe['LLOC_NAIX_PAIS'] == 58]

    seccions_mancants = [seccio for seccio in seccions_unique if not (seccio in list(espanyols_df['Seccio_Censal']))]
    seccions_mancants_df = pd.DataFrame({'Seccio_Censal': seccions_mancants, 'Valor': [0]*len(seccions_mancants)})
    espanyols_df = pd.concat([espanyols_df, seccions_mancants_df], ignore_index=True)

    espanyols_df['sumats'] = espanyols_df.groupby('Seccio_Censal')['Valor'].transform('sum')
    espanyols_df = espanyols_df.drop_duplicates('Seccio_Censal')
    espanyols_df = espanyols_df[['Seccio_Censal', 'sumats']]
    espanyols_df = espanyols_df.sort_values(by='Seccio_Censal').reset_index(drop=True)

    total_immigrants = immigrants_df['sumats'].sum()
    total_espanyols = espanyols_df['sumats'].sum()
    
    immigrants_df['index_isolation'] = (immigrants_df['sumats']/total_immigrants) * ((immigrants_df['sumats']-1) / (espanyols_df['sumats'] + immigrants_df['sumats']-1))
    espanyols_df['index_exposure'] = (espanyols_df['sumats']/total_espanyols) * ((immigrants_df['sumats']) / (espanyols_df['sumats']+immigrants_df['sumats']-1))

    index_isolation_immigrants = immigrants_df['index_isolation'].sum()
    index_exposure_espanyols = espanyols_df['index_exposure'].sum()

    ICS = index_isolation_immigrants - index_exposure_espanyols

    return ICS, index_isolation_immigrants, index_exposure_espanyols

def co_neighbour(dataframe):
    
    ICS = {}

    for year in set(dataframe['year']):
        filtered_df = dataframe[dataframe['year'] == year].reset_index(drop=True)
        total_immigrants = filtered_df['Immigrants'].sum()
        total_espanyols = filtered_df['Spanish'].sum()    
        filtered_df['index_isolation'] = (filtered_df['Immigrants']/total_immigrants) * ((filtered_df['Immigrants']-1) / (filtered_df['Spanish'] + filtered_df['Immigrants']-1)) 
        filtered_df['index_exposure'] = (filtered_df['Spanish']/total_espanyols) * ((filtered_df['Immigrants']) / (filtered_df['Spanish']+filtered_df['Immigrants']-1))

        index_isolation_immigrants = filtered_df['index_isolation'].sum()
        index_exposure_espanyols = filtered_df['index_exposure'].sum()
        ICS[year] = (index_isolation_immigrants - index_exposure_espanyols, index_isolation_immigrants, index_exposure_espanyols)

    return ICS

def calcul_ICS_mitja_random(dataframes, pais, seccions):
    totals_df = process_files(dataframes, pais)
    diffs_df = compute_all_differences(totals_df)

    base_total = {
        'year': int(diffs_df.loc[0, 'year']),
        'total_immigrants': int(diffs_df.loc[0, 'total_immigrants']),
        'total_espanyols': int(diffs_df.loc[0, 'total_espanyols'])
    }

    year_diff = []
    for i in range(1, len(diffs_df)):
        year_diff.append({
            'from_year': int(diffs_df.loc[i-1, 'year']),
            'to_year': int(diffs_df.loc[i, 'year']),
            'immigrant_diff': int(diffs_df.loc[i, 'immigrant_diff']),
            'espanyol_diff': int(diffs_df.loc[i, 'espanyol_diff']),
        })

    randomdf = build_yearly_census_df_cumulative(seccions, base_total, year_diff)

    ICS_by_year = {}
    paisref = pais
    ICS_by_year = co_neighbour(randomdf)
    
    return ICS_by_year

## Plots

def plot_coneighbour_index_per_seccio(data, country_name, dir):
    sorted_years = sorted(data.keys())
    index_coneighbours = [data[year][0] for year in sorted_years]
    index_isolation = [data[year][1] for year in sorted_years]
    index_exposure = [data[year][2] for year in sorted_years]

    plt.figure(figsize=(5,3))
    plt.plot(sorted_years, index_isolation, marker='o', linestyle='-', color='#0cb2af', label='Isolation index')
    plt.plot(sorted_years, index_exposure, marker='o', linestyle='-', color='#fac723', label='Exposure index')
    plt.plot(sorted_years, index_coneighbours, marker='o', linestyle='-', color='#a1c65d', label='Co-resident index')
    plt.xlabel("Year")
    plt.ylabel("Index value")
    # plt.title(f"Coneighbour index of dissimilarity by year in Barcelona ({country_name})")
    plt.xticks(sorted_years[::2], rotation=45)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend()
    plt.tight_layout()
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(dir + f'{country_name}_indices_values.png', dpi=300)
    plt.close()
   
# def plot_coneighbour_index_random_per_seccio(data, country_name, dir):
#     sorted_years = sorted(data.keys())
#     index_coneighbours = [data[year][0] for year in sorted_years]
#     index_isolation = [data[year][1] for year in sorted_years]
#     index_exposure = [data[year][2] for year in sorted_years]

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years, index_isolation, marker='o', linestyle='-', color='#0cb2af', label='Random isolation index')
#     plt.plot(sorted_years, index_exposure, marker='o', linestyle='-', color='#fac723', label='Random exposure index')
#     plt.plot(sorted_years, index_coneighbours, marker='o', linestyle='-', color='#a1c65d', label='Random coneighbours index')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Coneighbour index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_indices_values_random.png', dpi=300)
#     plt.close()

# def plot_coneighbour_index_random_comparison(data, data2, country_name, dir):
#     sorted_years = sorted(data.keys())
#     index_coneighbours = [data2[year][0] for year in sorted_years]
#     index_isolation = [data2[year][1] for year in sorted_years]
#     index_exposure = [data2[year][2] for year in sorted_years]
#     index_coneighbours_random = [data[year][0] for year in sorted_years]
#     index_isolation_random = [data[year][1] for year in sorted_years]
#     index_exposure_random = [data[year][2] for year in sorted_years]

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years, index_isolation, marker='o', linestyle='-', color='#0cb2af', label='Isolation index')
#     plt.plot(sorted_years, index_exposure, marker='o', linestyle='-', color='#fac723', label='Exposure index')
#     plt.plot(sorted_years, index_coneighbours, marker='o', linestyle='-', color='#a1c65d', label='Coneighbours index')
#     plt.plot(sorted_years, index_isolation_random, marker='o', linestyle='-', color='#f29222', label='Random isolation index')
#     plt.plot(sorted_years, index_exposure_random, marker='o', linestyle='-', color='#e95e50', label='Random exposure index')
#     plt.plot(sorted_years, index_coneighbours_random, marker='o', linestyle='-', color='#936fac', label='Random coneighbours index')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Coneighbour index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_indices_values_comparison.png', dpi=300)
#     plt.close()

# def plot_effective_ICS(data, country_name, dir):
#     sorted_years = sorted(data.keys())

#     plt.figure(figsize=(10, 6))
#     plt.plot(sorted_years, list(data.values())[::-1], marker='o', linestyle='-', color='green', label='Effective coneighbours index')
#     plt.xlabel("Year")
#     plt.ylabel("Index value")
#     # plt.title(f"Coneighbour index of dissimilarity by year in Barcelona ({country_name})")
#     plt.xticks(sorted_years, rotation=45)
#     plt.grid(True, linestyle='--', alpha=0.5)
#     plt.legend()
#     plt.tight_layout()
#     plt.savefig(dir + f'{country_name}_indices_values_effective.png', dpi=300)
#     plt.close()


def plot_effective_ICS_per_pais(data, country_names, dir):
    sorted_years = range(2002,2025)#sorted(data[0].keys())

    colours = ['#0cb2af', '#a1c65d', '#fac723', '#f29222', '#e95e50', '#936fac']

    plt.figure(figsize=(10, 6))
    for i in range(len(data)):
        plt.plot(sorted_years, data[i], marker='o', linestyle='-', color=colours[i], label=f'Effective Co-resident index ({country_names[i]})')
    plt.xlabel("Year")
    plt.ylabel("Index value")
    # plt.title(f"Coneighbour index of dissimilarity by year in Barcelona ({country_name})")
    plt.xticks(sorted_years[::2], rotation=45)
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.legend()
    plt.tight_layout()
    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.savefig(dir + f'countries_indices_values_comparison.png', dpi=300)
    plt.close()

TORNAR A TIRARHO TOT (20 MIN APROX) PERQUE NO HAVIA CANVIAT EL NOM DE LA LLEGENDA DE TOTS JUNTS I SURT CONEIGHBOURS EN COMPTES DE CO-RESIDENT

In [None]:
paisos_a_considerar_morans = [65, 108, 11, 67, 174, 58]
paisos_a_considerar_quantitat = [11, 39, 161, 167, 230, 58]
country_names_morans = ['Philippines','Italy', 'Argentina', 'France', 'United Kingdom', 'Foreigners']
country_names_quantitat = ['Argentina', 'Colombia', 'Pakistan', 'Peru', 'Venezuela', 'Foreigners']

for paisos_a_considerar,country_names,dir in [[paisos_a_considerar_morans, country_names_morans, 'coneighbours_results/higher_morans/'],[paisos_a_considerar_quantitat, country_names_quantitat,'coneighbours_results/higher_quantity/']]:
    output_table = {}
    for i,pais in enumerate(paisos_a_considerar):
        percentatges_per_seccio = {}
        country_name = country_names[i]
        print(f'Working on: {country_name}')
        for dataframe in dataframes_naix:
            any = int(dataframe[5:9])
            percentatges_per_seccio[any] = co_neighbour_1(dataframe, pais)
        #plot_coneighbour_index_per_seccio(percentatges_per_seccio, country_name, dir)

        seccions = pd.read_csv(dataframes_naix[0])
        seccions = seccions['Seccio_Censal'].unique()

        vegades = 10
        ICS_random = {}
        ICS_random_mean = {} 
        II_random_mean = {}
        IE_random_mean = {}
        paisref = '1'

        for i in range(vegades):
            ICS_random[i] = calcul_ICS_mitja_random(dataframes_naix, pais, seccions)

            for year in ICS_random[i].keys():
                if ICS_random_mean.get(year) is None:
                    ICS_random_mean[year] = []
                    II_random_mean[year] = []
                    IE_random_mean[year] = []
                ICS_random_mean[year].append(ICS_random[i][year][0])
                II_random_mean[year].append(ICS_random[i][year][1])
                IE_random_mean[year].append(ICS_random[i][year][2])

        mean_ICS_by_year = {year: np.mean(values) for year, values in ICS_random_mean.items()}
        mean_II_by_year = {year: np.mean(values) for year, values in II_random_mean.items()}
        mean_IE_by_year = {year: np.mean(values) for year, values in IE_random_mean.items()}

        dict_mean_indices = {year: (mean_ICS_by_year[year], mean_II_by_year[year], mean_IE_by_year[year]) for year in range(2002,2025)}

        #plot_coneighbour_index_random_per_seccio(dict_mean_indices, country_name, dir)

        #plot_coneighbour_index_random_comparison(dict_mean_indices, percentatges_per_seccio, country_name, dir)

        effective_ICS = {}
        for year in percentatges_per_seccio.keys():
            effective_ICS[year] = (percentatges_per_seccio[year][0] - mean_ICS_by_year[year])/(1-mean_ICS_by_year[year])
        
        #plot_effective_ICS(effective_ICS, country_name, dir)

        sorted_years = sorted(percentatges_per_seccio.keys())
        ci_values = [percentatges_per_seccio[year][0] for year in sorted_years]
        ci_values_random = [mean_ICS_by_year[year] for year in sorted_years]
        ci_effective_values = [effective_ICS[year] for year in sorted_years]

        output_table[country_name] = {'CI real': ci_values, 'CI random': ci_values_random, 'CI effective': ci_effective_values}
    
    plot_effective_ICS_per_pais([x['CI effective'] for x in output_table.values()], country_names, dir)
    
    with open(dir + 'taula.csv', 'w') as f:
        f.write('Year,')
        for year in range(2002,2025):
            f.write(f'{year},')
        f.write('\n')
        for country in output_table.keys():
            f.write(country + '\n')
            for index in output_table[country]:
                f.write(index + ',')
                for val in output_table[country][index]:
                    f.write('{:.3f}'.format(val) + ',')
                f.write('\n')

Working on: Philippines
Working on: Italy
Working on: Argentina
Working on: France
Working on: United Kingdom
Working on: Foreigners
Working on: Argentina
Working on: Colombia
Working on: Pakistan
Working on: Peru
Working on: Venezuela
Working on: Foreigners
