In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches

from collections import Counter

In [None]:
nuts_df_all = pd.read_excel('tweets/NUTS2021.xlsx').dropna()
nuts_df = nuts_df_all[nuts_df_all['Country Code'] == 'IT']

In [None]:
# Map other NUTS level codes to its related NUTS level 3 codes

nuts0_df = nuts_df[nuts_df['NUTS level'] == 0]
nuts2_df = nuts_df[nuts_df['NUTS level'] == 2]
nuts3_df = nuts_df[nuts_df['NUTS level'] == 3]

nuts3_mapping = {}

for nuts0_code in nuts0_df['Code 2021']:
    related_nuts3_codes = nuts3_df[nuts3_df['Code 2021'].str.startswith(nuts0_code)]['Code 2021'].tolist()
    nuts3_mapping[nuts0_code] = related_nuts3_codes

for nuts2_code in nuts2_df['Code 2021']:
    related_nuts3_codes = nuts3_df[nuts3_df['Code 2021'].str.startswith(nuts2_code)]['Code 2021'].tolist()
    nuts3_mapping[nuts2_code] = related_nuts3_codes

In [None]:
gdf = gpd.read_file('tweets/NUTS_RG_60M_2021_4326.shp')
gdf_italy = gdf[gdf['CNTR_CODE'] == 'IT']

In [None]:
df = pd.read_excel('tweets/tweets_11578_scores.xlsx')

df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df['week'] = df['date'].dt.isocalendar().week 

# Ensure nuts column is a list

df['nuts'] = df['nuts'].apply(lambda x: x.replace('[', '').replace(']', '').replace("'", ''))
df['nuts'] = df['nuts'].apply(lambda x: x.split(', '))

In [None]:
# Mapping monthly aggregated category scores in each month

def nuts3_allmonth_category_log_plot(category):
    
    fig, axes = plt.subplots(3, 4, figsize=(20, 15))
    
    month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    months = range(1, 13)
    

    vmin, vmax = 1, 1000
    norm = mcolors.LogNorm(vmin=vmin, vmax=vmax)
#     cmap = 'Blues'
    cmap = 'YlOrBr'

    for month, ax in zip(months, axes.flatten()):
        nuts3_scores = []
        df_filtered = df[df['month'] == month]

        for i, codes in enumerate(df_filtered['nuts']):
            score = df_filtered[category].iloc[i] 
            for code in codes:
                if code in nuts3_mapping:
                    for mapped_code in nuts3_mapping[code]:
                        nuts3_scores.append((mapped_code, score))
                else:
                    nuts3_scores.append((code, score))

        nuts3_scores_df = pd.DataFrame(nuts3_scores, columns=['NUTS', 'Scores'])
        nuts3_scores_df = nuts3_scores_df.groupby('NUTS').sum().reset_index()

        geonuts_italy = gdf_italy.merge(nuts3_scores_df, how='left', left_on='NUTS_ID', right_on='NUTS')
        geonuts_italy.boundary.plot(ax=ax, color='black')
        geonuts_italy.plot(column='Scores', ax=ax, legend=False, cmap=cmap, norm=norm)

        ax.set_title(f'{month_names[month-1]}', fontsize=12)
        ax.set_axis_off()

    fig.subplots_adjust(right=0.85)
    cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.7])
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm._A = []
    cbar = fig.colorbar(sm, cax=cbar_ax)
    cbar.set_label('Scores')

    plt.tight_layout(rect=[0, 0, 0.85, 0.95])
    plt.show()

In [None]:
nuts3_allmonth_category_log_plot('Agriculture')

In [None]:
nuts3_allmonth_category_log_plot('Water')

In [None]:
# Mapping monthly aggregated category scores in summer months

def nuts3_summer_category_log_plot(category):
    fig, axes = plt.subplots(1, 3, figsize=(20, 15))
    
    month_names = ['Jun', 'Jul', 'Aug']
    months = range(6, 9)
    
    vmin, vmax = 1, 350
    norm=plt.Normalize(vmin=vmin, vmax=vmax)

#     cmap = 'Blues'
    cmap = 'YlOrBr'

    for month, ax in zip(months, axes.flatten()):
        nuts3_scores = []
        df_filtered = df[df['month'] == month]

        for i, codes in enumerate(df_filtered['nuts']):
            score = df_filtered[category].iloc[i] 
            for code in codes:
                if code in nuts3_mapping:
                    for mapped_code in nuts3_mapping[code]:
                        nuts3_scores.append((mapped_code, score))
                else:
                    nuts3_scores.append((code, score))

        nuts3_scores_df = pd.DataFrame(nuts3_scores, columns=['NUTS', 'Scores'])
        nuts3_scores_df = nuts3_scores_df.groupby('NUTS').sum().reset_index()

        geonuts_italy = gdf_italy.merge(nuts3_scores_df, how='left', left_on='NUTS_ID', right_on='NUTS')
        geonuts_italy.boundary.plot(ax=ax, color='black')
        geonuts_italy.plot(column='Scores', ax=ax, legend=False, cmap=cmap, norm=norm)

        # Set title to the month name
        ax.set_title(f'{month_names[month-6]}', fontsize=12)
        ax.set_axis_off()

    # Adjust for color bar on the right
    fig.subplots_adjust(right=0.85)
    cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.7])
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm._A = []  # Required for ScalarMappable
    cbar = fig.colorbar(sm, cax=cbar_ax)
    cbar.set_label('Scores')

    plt.tight_layout(rect=[0, 0, 0.85, 0.95])
    plt.show()

In [None]:
nuts3_summer_category_log_plot('Agriculture')

In [None]:
# Mapping monthly aggregated category scores in each sector

def nuts3_month_all_categories_plot(month):
    
    categories = ['Agriculture', 'Water', 'Ecosystem', 'Economy', 'Society', 'General']
    category_cmap = {
        'Agriculture': 'YlOrBr',
        'Water': 'Blues',
        'Ecosystem': 'Greens',
        'Economy': 'Purples',
        'Society': 'RdPu',
        'General': 'Greys'
    }

    fig, axes = plt.subplots(2, 3, figsize=(20, 10))
    fig.suptitle(f'Scores for All Categories in Month {month}', fontsize=20)
    
    for category, ax in zip(categories, axes.flatten()):
        nuts3_scores = []
        df_filtered = df[df['month'] == month]

        for i, codes in enumerate(df_filtered['nuts']):
            score = df_filtered[category].iloc[i]
            for code in codes:
                if code in nuts3_mapping:
                    for mapped_code in nuts3_mapping[code]:
                        nuts3_scores.append((mapped_code, score))
                else:
                    nuts3_scores.append((code, score))

        nuts3_scores_df = pd.DataFrame(nuts3_scores, columns=['NUTS', 'Scores'])
        nuts3_scores_df = nuts3_scores_df.groupby('NUTS').sum().reset_index()

        geonuts_italy = gdf_italy.merge(nuts3_scores_df, how='left', left_on='NUTS_ID', right_on='NUTS')
        geonuts_italy.boundary.plot(ax=ax, color='black')
        
        geonuts_italy_plot = geonuts_italy.plot(column='Scores', ax=ax, legend=True, cmap=category_cmap[category])

        ax.set_title(f'{category} - Month {month}', fontsize=12)
        ax.set_axis_off()
        
    if len(categories) < len(axes.flatten()):
        for ax in axes.flatten()[len(categories):]:
            ax.remove()

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
nuts3_month_all_categories_plot(6)

In [None]:
# Aggregate scores across all categories for each NUTS3 code

def nuts3_category(category):
    
    nuts3_scores = []

    for i, codes in enumerate(df['nuts']):
        score = df[category].iloc[i] 
        for code in codes:
            if code in nuts3_mapping:
                for mapped_code in nuts3_mapping[code]:
                    nuts3_scores.append((mapped_code, score))
            else:
                nuts3_scores.append((code, score))

    nuts3_scores_df = pd.DataFrame(nuts3_scores, columns=['NUTS', category])
    nuts3_scores_df = nuts3_scores_df.groupby('NUTS').sum().reset_index()
    
    return nuts3_scores_df

categories = ['Agriculture', 'Water', 'Ecosystem', 'Economy', 'Society', 'General']

all_scores_df = pd.DataFrame()

for category in categories:
    category_scores_df = nuts3_category(category)
    
    if all_scores_df.empty:
        all_scores_df = category_scores_df
    else:
        all_scores_df = pd.merge(all_scores_df, category_scores_df, on='NUTS', how='outer')

In [None]:
# Compute normalized scores and determine rank 1, 2, and 3 categories

grand_totals = all_scores_df.drop(columns=['NUTS']).sum()
grand_totals_row = pd.DataFrame([grand_totals], index=['Grand Total'])

all_scores_df = pd.concat([all_scores_df, grand_totals_row])

all_percentages_df = all_scores_df.drop(columns=['NUTS']).div(grand_totals) * 100
all_percentages_df['NUTS'] = all_scores_df['NUTS']

all_percentages_df['Rank 1 Score'] = all_percentages_df[categories].idxmax(axis=1)
all_percentages_df['Rank 2 Score'] = all_percentages_df[categories].apply(lambda x: x.nlargest(2).idxmin(), axis=1)
all_percentages_df['Rank 3 Score'] = all_percentages_df[categories].apply(lambda x: x.nlargest(3).idxmin(), axis=1)

rank_df = all_percentages_df[['NUTS', 'Rank 1 Score', 'Rank 2 Score', 'Rank 3 Score']]

In [None]:
# Plot rank 1, 2, and 3 categories

df_merged = gdf_italy.merge(rank_df, left_on='NUTS_ID', right_on='NUTS')

category_colors = {
    'Agriculture': '#d95f0e',
    'Water': '#3182bd',
    'Ecosystem': '#31a354',
    'Economy': '#756bb1',
    'Society': '#dd1c77',
    'General': '#636363'
}

df_merged['Rank 1 Color'] = df_merged['Rank 1 Score'].map(category_colors)
df_merged['Rank 2 Color'] = df_merged['Rank 2 Score'].map(category_colors)
df_merged['Rank 3 Color'] = df_merged['Rank 3 Score'].map(category_colors)

fig, ax = plt.subplots(1, 3, figsize=(15, 10))

df_merged.plot(ax=ax[0], color=df_merged['Rank 1 Color'], alpha=0.7)
df_merged.boundary.plot(ax=ax[0], color='black')
ax[0].set_title('Rank 1')
ax[0].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

df_merged.plot(ax=ax[1], color=df_merged['Rank 2 Color'], alpha=0.7)
df_merged.boundary.plot(ax=ax[1], color='black')
ax[1].set_title('Rank 2')
ax[1].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

df_merged.plot(ax=ax[2], color=df_merged['Rank 3 Color'], alpha=0.7)
df_merged.boundary.plot(ax=ax[2], color='black')
ax[2].set_title('Rank 3')
ax[2].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

legend_patches = [mpatches.Patch(color=color, label=category, alpha=0.7) for category, color in category_colors.items()]
ax[0].legend(handles=legend_patches, loc='upper right', title='Categories')
ax[1].legend(handles=legend_patches, loc='upper right', title='Categories')
ax[2].legend(handles=legend_patches, loc='upper right', title='Categories')

plt.tight_layout()
plt.show()