# Data analysis

Plotting and statistical analysis of data generated by `build_distribution_data.ipynb`

## Source data

- Located in folder `./outputs/`
- `distributions_data.csv` 
- `communa_data.csv` 


## Processing decisions

### Counting terminal topics once only in top-level categories

There is no double-counting of terminal topics within top-level categories.

### Assignment of additional topics

The additional four topics were assigned to `topics_list.csv` as follows:
- `principles_and_symbols/Basic_Principles/democ`
- `rights_and_duties/Legal Procedural Rights/rulelaw`
- `rights_and_duties/Economic_Rights/socsec`
- `elections/Suffrage_and_Turnout/votemin`

### Double-counting of sections/responses

Some segments may be above-threshold with more than one topic. This means there is a difference between the _size of the set of above-threshold segments_ (how many segments captured by one or more topics), and the _total of above-threshold segments across topics_, (how many captures by topics). For example for constitution sections there are:
- 5039 sections in the set of above-threshold sections
- 7184 sections that are above-threshold across topics

The distribution of segments across topics is the subject of analysis. For this reason, translation of segment counts to a 0 - 1 scale is based on the total of above-threshold segments. 



In [None]:

__author__      = 'Roy Gardner'
__copyright__   = 'Copyright 2023, Roy and Sally Gardner'

import csv
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import stats
import time

def plot_birectional(left,right,titles,max_x=0,labelled=False,file=''):
    """
    Plot a bidirectional graph with constitution section percentages on left and responses on right.
    x-axis is percentage of above-threshold sections or responses above-threshold for a topic or category
    y-axis is topic or category label (for all topics the label is the topic ID and is not shown)
    The left and right data sets must be the same length.
    param left: Data tuples (label,value) for the left side.
    param right: Data tuples (label,value) for the right side
    param titles: Left and right titles in a list [<left_title>,<right__title>]
    param max_x: Use to manually set the max_x value. If 0 defaults to maximum x + 1 of data series values
    param labelled: If True then display data tuple lables on the y-axis, otherwise just use integer 
    values over range of data
    """
    maxs = [] 
    maxs.append(max([t[1] for t in left]))
    maxs.append(max([t[1] for t in right]))

    if max_x == 0:
        max_x = int(max(maxs))+1
    else:
        max_x = max_x

    font_color = '#525252'
    facecolor = '#eaeaf2'
    color_red = '#fd625e'
    color_blue = '#01b8aa'
    index = list(range(0,len(left)))
    column0 = [t[1] for t in left]
    column1 = [t[1] for t in right]
    title0 = titles[0]
    title1 = titles[1]

    fig, axes = plt.subplots(figsize=(10,10), ncols=2, sharey=False)
    fig.tight_layout()

    axes[0].barh(index, column0, align='center', color=color_red, zorder=10)
    axes[0].set_title(title0, fontsize=16, pad=15, color=color_red)
    axes[1].barh(index, column1, align='center', color=color_blue, zorder=10)
    axes[1].set_title(title1, fontsize=16, pad=15, color=color_blue)

    axes[0].invert_xaxis() 
    #plt.gca().invert_yaxis()

    plt.subplots_adjust(wspace=0, top=0.85, bottom=0.1, left=0.18, right=0.95)

    axes[0].set_xticks(list(range(0,max_x+5,5)))
    axes[1].set_xticks(list(range(0,max_x+5,5)))
    axes[1].set_yticks([])

    if labelled:
        axes[0].set_yticks(range(0,len(left)))
        axes[0].set_yticklabels([t[0] for t in left])
    else:
        axes[0].set_ylabel('Topic index',fontsize='x-large',)
        

    axes[0].tick_params(labelsize='x-large')
    axes[1].tick_params(labelsize='x-large')
    #axes[0].set_ylabel('Ranked topics',fontsize='xx-large')
    axes[0].set_xlabel('% of above-threshold sections',fontsize='x-large',)
    axes[1].set_xlabel('% of above-threshold responses',fontsize='x-large',)

    #axes[0].xaxis.set_label_coords(1.025, -0.055)
    if len(file) > 0:
        plt.savefig('./figures/' + file + '.tif',dpi=300,bbox_inches='tight')
    plt.show()




## Load the distribution data files

In [None]:

# Distribution data
distribution_data = []
with open('./outputs/distribution_data.csv','r',encoding='utf-8',errors='ignore') as f:
    reader = csv.reader(f)
    dist_header = next(reader)
    for row in reader:
        distribution_data.append(row)
    f.close()

# Communa data
communa_data = []
with open('./outputs/communa_data.csv','r',encoding='utf-8',errors='ignore') as f:
    reader = csv.reader(f)
    communa_header = next(reader)
    for row in reader:
        communa_data.append(row)
    f.close()


In [None]:
print(dist_header)

## Top-10 topics

- Constitution sections
- Aggregated over source types

Done programmatically, but could sort the `distribution_data.csv` spreadsheet.


In [None]:

sorted_const_data = sorted(distribution_data,key=lambda t:\
                           int(t[dist_header.index('# Constitution sections')]),reverse=True)
print([(t[0],t[1]) for t in sorted_const_data[0:10]])
print()

sorted_response_data = sorted(distribution_data,key=lambda t:\
                           int(t[dist_header.index('# Aggregated responses')]),reverse=True)
print([(t[0],t[1]) for t in sorted_response_data[0:10]])



## Analysis by top-level category

- Dependency on cell above `all_counts_dict`
- Only counting a topic once within the top-level categories.


### Figure 2: Analysis by top-level category


In [None]:
# Get the counts in each category 
cat_counts_dict = {}

for row in distribution_data:
    cats = row[dist_header.index('Topic categories')].split(',')
    for cat in cats:
        if cat in cat_counts_dict:
            cat_counts_dict[cat][0] += int(row[dist_header.index('# Constitution sections')])
            cat_counts_dict[cat][1] += int(row[dist_header.index('# Aggregated responses')])
            cat_counts_dict[cat][2] += int(row[dist_header.index('# Regional responses')])
            cat_counts_dict[cat][3] += int(row[dist_header.index('# Provincial responses')])
            cat_counts_dict[cat][4] += int(row[dist_header.index('# ELA responses')])
        else:
            cat_counts_dict[cat] = [int(row[dist_header.index('# Constitution sections')]),\
                                   int(row[dist_header.index('# Aggregated responses')]),\
                                   int(row[dist_header.index('# Regional responses')]),\
                                   int(row[dist_header.index('# Provincial responses')]),\
                                   int(row[dist_header.index('# ELA responses')])]

tot_const = sum([v[0] for _,v in cat_counts_dict.items()])
tot_aggr = sum([v[1] for _,v in cat_counts_dict.items()])
tot_reg = sum([v[2] for _,v in cat_counts_dict.items()])
tot_prov = sum([v[3] for _,v in cat_counts_dict.items()])
tot_ela = sum([v[4] for _,v in cat_counts_dict.items()])

# Convert counts to percentages of total
percent_dict = {}
for k,v in cat_counts_dict.items():
    percent_dict[k] = [round((v[0]/tot_const)*100,2),\
                       round((v[1]/tot_aggr)*100,2),\
                       round((v[2]/tot_aggr)*100,2),\
                       round((v[3]/tot_aggr)*100,2),\
                       round((v[4]/tot_aggr)*100,2)]

# Sort by constitutions scores
sorted_percent_dict = sorted(percent_dict.items(),key=lambda kv:kv[1][0],reverse=False)

plot_birectional([(t[0],t[1][0]) for t in sorted_percent_dict],[(t[0],t[1][1]) for t in sorted_percent_dict],\
                 ['Constitution sections','Consultation responses'],max_x=60,labelled=True,file='figure_2')


### Stats - constitutions and source type

In [None]:
# Print stats
print('Const-Aggr',stats.pearsonr(\
            [v[0] for k,v in percent_dict.items()],[v[1] for k,v in percent_dict.items()]))
print('Const-Reg',stats.pearsonr(\
            [v[0] for k,v in percent_dict.items()],[v[2] for k,v in percent_dict.items()]))
print('Const-Prov',stats.pearsonr(\
            [v[0] for k,v in percent_dict.items()],[v[3] for k,v in percent_dict.items()]))
print('Const-ELA',stats.pearsonr(\
            [v[0] for k,v in percent_dict.items()],[v[4] for k,v in percent_dict.items()]))

print()

print('Reg-Prov',stats.pearsonr(\
            [v[2] for k,v in percent_dict.items()],[v[3] for k,v in percent_dict.items()]))
print('Reg-ELA',stats.pearsonr(\
            [v[2] for k,v in percent_dict.items()],[v[4] for k,v in percent_dict.items()]))
print('Prov-ELA',stats.pearsonr(\
            [v[3] for k,v in percent_dict.items()],[v[4] for k,v in percent_dict.items()]))



## Analysis by all topics

- Aggregate responses
- Separate responses


### Figure 1: Analysis using source types

In [None]:
const_dict = {row[0]:float(row[dist_header.index('% Constitution sections')]) for row in distribution_data}
sorted_const_dict = sorted(const_dict.items(),key=lambda kv:kv[1],reverse=False)

response_dict = {row[0]:float(row[dist_header.index('% Aggregated responses')]) for row in distribution_data}

# Get aggregated responses in same order as constitutions
sorted_response_dict = []
for t in sorted_const_dict:
    sorted_response_dict.append((t[0],response_dict[t[0]]))

plot_birectional(sorted_const_dict,sorted_response_dict,\
                             ['Constitution sections','Consultation responses'],labelled=False,file='figure_1')



### Stats - constitutions and source type

In [None]:
# Print stats
print('Const-Aggr',stats.pearsonr(\
            [float(row[dist_header.index('% Constitution sections')]) for row in distribution_data],\
            [float(row[dist_header.index('% Aggregated responses')]) for row in distribution_data]))
print('Const-Reg',stats.pearsonr(\
            [float(row[dist_header.index('% Constitution sections')]) for row in distribution_data],\
            [float(row[dist_header.index('% Regional responses')]) for row in distribution_data]))
print('Const-Prov',stats.pearsonr(\
            [float(row[dist_header.index('% Constitution sections')]) for row in distribution_data],\
            [float(row[dist_header.index('% Provincial responses')]) for row in distribution_data]))
print('Const-ELA',stats.pearsonr(\
            [float(row[dist_header.index('% Constitution sections')]) for row in distribution_data],\
            [float(row[dist_header.index('% ELA responses')]) for row in distribution_data]))

print()

print('Reg-Prov',stats.pearsonr(\
            [float(row[dist_header.index('% Regional responses')]) for row in distribution_data],\
            [float(row[dist_header.index('% Provincial responses')]) for row in distribution_data]))
print('Reg-ELA',stats.pearsonr(\
            [float(row[dist_header.index('% Regional responses')]) for row in distribution_data],\
            [float(row[dist_header.index('% ELA responses')]) for row in distribution_data]))
print('Prov-ELA',stats.pearsonr(\
            [float(row[dist_header.index('% Provincial responses')]) for row in distribution_data],\
            [float(row[dist_header.index('% ELA responses')]) for row in distribution_data]))



### Analysis of communa types

Communa type is aggregated in the Provincial and ELA source types so we ask:

1. What is the association between communa types within the Provincial and ELA source types.
2. What is the association between each of the communa types across the Provincial and ELA source types.


In [None]:

print('Prov_urban-Prov_rural',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Urban')]) for row in communa_data],\
            [float(row[communa_header.index('% Provincial Rural')]) for row in communa_data]))
print('Prov_urban-Prov_mixed',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Urban')]) for row in communa_data],\
            [float(row[communa_header.index('% Provincial Mixed')]) for row in communa_data]))
print('Prov_rural-Prov_mixed',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Rural')]) for row in communa_data],\
            [float(row[communa_header.index('% Provincial Mixed')]) for row in communa_data]))
print()

print('ELA_urban-ELA_rural',stats.pearsonr(\
            [float(row[communa_header.index('% ELA Urban')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Rural')]) for row in communa_data]))
print('ELA_urban-ELA_mixed',stats.pearsonr(\
            [float(row[communa_header.index('% ELA Urban')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Mixed')]) for row in communa_data]))
print('ELA_rural-ELA_mixed',stats.pearsonr(\
            [float(row[communa_header.index('% ELA Rural')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Mixed')]) for row in communa_data]))

print()

print('Prov_urban-ELA_urban',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Urban')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Urban')]) for row in communa_data]))
print('Prov_rural-ELA_rural',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Rural')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Rural')]) for row in communa_data]))
print('Prov_mixed-ELA_mixed',stats.pearsonr(\
            [float(row[communa_header.index('% Provincial Mixed')]) for row in communa_data],\
            [float(row[communa_header.index('% ELA Mixed')]) for row in communa_data]))


## Rights topics

### Figures 3a and 3b

In [None]:
rights_counts_dict = {}

for row in distribution_data:
    if int(row[dist_header.index('Rights topic')]) == 1:
        # Use topic labels not IDs
        rights_counts_dict[row[1]] = []
        rights_counts_dict[row[1]].append(int(row[dist_header.index('# Constitution sections')]))
        rights_counts_dict[row[1]].append(int(row[dist_header.index('# Aggregated responses')]))
        rights_counts_dict[row[1]].append(int(row[dist_header.index('# Regional responses')]))
        rights_counts_dict[row[1]].append(int(row[dist_header.index('# Provincial responses')]))
        rights_counts_dict[row[1]].append(int(row[dist_header.index('# ELA responses')]))

tot_const = sum([v[0] for _,v in rights_counts_dict.items()])
tot_aggr = sum([v[1] for _,v in rights_counts_dict.items()])
tot_reg = sum([v[2] for _,v in rights_counts_dict.items()])
tot_prov = sum([v[3] for _,v in rights_counts_dict.items()])
tot_ela = sum([v[4] for _,v in rights_counts_dict.items()])

# Convert counts to percentages of total
rights_percent_dict = {}
for k,v in rights_counts_dict.items():
    rights_percent_dict[k] = [round((v[0]/tot_const)*100,2),\
                       round((v[1]/tot_aggr)*100,2),\
                       round((v[2]/tot_aggr)*100,2),\
                       round((v[3]/tot_aggr)*100,2),\
                       round((v[4]/tot_aggr)*100,2)]

# Sort by constitutions scores
sorted_percent_dict = sorted(rights_percent_dict.items(),key=lambda kv:kv[1][0],reverse=False)

plot_birectional([(t[0],t[1][0]) for t in sorted_percent_dict][-30:],\
                 [(t[0],t[1][1]) for t in sorted_percent_dict][-30:],\
                 ['Constitution sections','Consultation responses'],labelled=True,file='figure_3a')

# Sort by agrregated scores scores
sorted_percent_dict = sorted(rights_percent_dict.items(),key=lambda kv:kv[1][1],reverse=False)

plot_birectional([(t[0],t[1][0]) for t in sorted_percent_dict][-30:],\
                 [(t[0],t[1][1]) for t in sorted_percent_dict][-30:],\
                 ['Constitution sections','Consultation responses'],labelled=True,file='figure_3b')


### Stats constitutions and response source types

In [None]:

# Print stats
print('Const-Aggr',stats.pearsonr(\
            [v[0] for k,v in rights_percent_dict.items()],[v[1] for k,v in rights_percent_dict.items()]))
print('Const-Reg',stats.pearsonr(\
            [v[0] for k,v in rights_percent_dict.items()],[v[2] for k,v in rights_percent_dict.items()]))
print('Const-Prov',stats.pearsonr(\
            [v[0] for k,v in rights_percent_dict.items()],[v[3] for k,v in rights_percent_dict.items()]))
print('Const-ELA',stats.pearsonr(\
            [v[0] for k,v in rights_percent_dict.items()],[v[4] for k,v in rights_percent_dict.items()]))

print()

print('Reg-Prov',stats.pearsonr(\
            [v[2] for k,v in rights_percent_dict.items()],[v[3] for k,v in rights_percent_dict.items()]))
print('Reg-ELA',stats.pearsonr(\
            [v[2] for k,v in rights_percent_dict.items()],[v[4] for k,v in rights_percent_dict.items()]))
print('Prov-ELA',stats.pearsonr(\
            [v[3] for k,v in rights_percent_dict.items()],[v[4] for k,v in rights_percent_dict.items()]))


### Stats communa

In [None]:
rights_communa_count_dict = {}

for row in communa_data:
    if int(row[dist_header.index('Rights topic')]) == 1:
        rights_communa_count_dict[row[0]] = []
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Urban')]))
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Rural')]))
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Mixed')]))
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Urban')]))
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Rural')]))
        rights_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Mixed')]))

tot_prov_urban = sum([v[0] for _,v in rights_communa_count_dict.items()])
tot_prov_rural = sum([v[1] for _,v in rights_communa_count_dict.items()])
tot_prov_mixed = sum([v[2] for _,v in rights_communa_count_dict.items()])
tot_ela_urban = sum([v[3] for _,v in rights_communa_count_dict.items()])
tot_ela_rural = sum([v[4] for _,v in rights_communa_count_dict.items()])
tot_ela_mixed = sum([v[5] for _,v in rights_communa_count_dict.items()])

# Convert counts to percentages of total
communa_percent_dict = {}
for k,v in rights_communa_count_dict.items():
    communa_percent_dict[k] = [round((v[0]/tot_prov_urban)*100,2),\
                       round((v[1]/tot_prov_rural)*100,2),\
                       round((v[2]/tot_prov_mixed)*100,2),\
                       round((v[3]/tot_ela_urban)*100,2),\
                       round((v[4]/tot_ela_rural)*100,2),\
                       round((v[5]/tot_ela_mixed)*100,2)]
    
# Print stats
print('Prov_urban-Prov_rural',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[1] for k,v in communa_percent_dict.items()]))
print('Prov_urban-Prov_mixed',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[2] for k,v in communa_percent_dict.items()]))
print('Prov_rural-Prov_mixed',stats.pearsonr(\
            [v[1] for k,v in communa_percent_dict.items()],[v[2] for k,v in communa_percent_dict.items()]))

print()

print('ELA_urban-ELA_rural',stats.pearsonr(\
            [v[3] for k,v in communa_percent_dict.items()],[v[4] for k,v in communa_percent_dict.items()]))
print('ELA_urban-ELA_mixed',stats.pearsonr(\
            [v[3] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))
print('ELA_rural-ELA_mixed',stats.pearsonr(\
            [v[4] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))

print()

print('Prov_urban-ELA_urban',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[3] for k,v in communa_percent_dict.items()]))
print('Prov_rural-ELA_rural',stats.pearsonr(\
            [v[1] for k,v in communa_percent_dict.items()],[v[4] for k,v in communa_percent_dict.items()]))
print('Prov_mixed-ELA_mixed',stats.pearsonr(\
            [v[2] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))



## Institution-related topics

### Figures 4a and 4b

In [None]:
inst_counts_dict = {}

for row in distribution_data:
    if int(row[dist_header.index('Institution topic')]) == 1:
        # Use topic labels
        inst_counts_dict[row[1]] = []
        inst_counts_dict[row[1]].append(int(row[dist_header.index('# Constitution sections')]))
        inst_counts_dict[row[1]].append(int(row[dist_header.index('# Aggregated responses')]))
        inst_counts_dict[row[1]].append(int(row[dist_header.index('# Regional responses')]))
        inst_counts_dict[row[1]].append(int(row[dist_header.index('# Provincial responses')]))
        inst_counts_dict[row[1]].append(int(row[dist_header.index('# ELA responses')]))

tot_const = sum([v[0] for _,v in inst_counts_dict.items()])
tot_aggr = sum([v[1] for _,v in inst_counts_dict.items()])
tot_reg = sum([v[2] for _,v in inst_counts_dict.items()])
tot_prov = sum([v[3] for _,v in inst_counts_dict.items()])
tot_ela = sum([v[4] for _,v in inst_counts_dict.items()])

# Convert counts to percentages of total
inst_percent_dict = {}
for k,v in inst_counts_dict.items():
    inst_percent_dict[k] = [round((v[0]/tot_const)*100,2),\
                       round((v[1]/tot_aggr)*100,2),\
                       round((v[2]/tot_aggr)*100,2),\
                       round((v[3]/tot_aggr)*100,2),\
                       round((v[4]/tot_aggr)*100,2)]

# Sort by constitutions scores
sorted_percent_dict = sorted(inst_percent_dict.items(),key=lambda kv:kv[1][0],reverse=False)

plot_birectional([(t[0],t[1][0]) for t in sorted_percent_dict][-30:],\
                 [(t[0],t[1][1]) for t in sorted_percent_dict][-30:],\
                 ['Constitution sections','Consultation responses'],labelled=True,file='figure_4a')

# Sort by agrregated scores scores
sorted_percent_dict = sorted(inst_percent_dict.items(),key=lambda kv:kv[1][1],reverse=False)

plot_birectional([(t[0],t[1][0]) for t in sorted_percent_dict][-30:],\
                 [(t[0],t[1][1]) for t in sorted_percent_dict][-30:],\
                 ['Constitution sections','Consultation responses'],labelled=True,file='figure_4b')


### Stats - constitutions and source type

In [None]:
# Print stats
print('Const-Aggr',stats.pearsonr(\
            [v[0] for k,v in inst_percent_dict.items()],[v[1] for k,v in inst_percent_dict.items()]))
print('Const-Reg',stats.pearsonr(\
            [v[0] for k,v in inst_percent_dict.items()],[v[2] for k,v in inst_percent_dict.items()]))
print('Const-Prov',stats.pearsonr(\
            [v[0] for k,v in inst_percent_dict.items()],[v[3] for k,v in inst_percent_dict.items()]))
print('Const-ELA',stats.pearsonr(\
            [v[0] for k,v in inst_percent_dict.items()],[v[4] for k,v in inst_percent_dict.items()]))

print()

print('Reg-Prov',stats.pearsonr(\
            [v[2] for k,v in inst_percent_dict.items()],[v[3] for k,v in inst_percent_dict.items()]))
print('Reg-ELA',stats.pearsonr(\
            [v[2] for k,v in inst_percent_dict.items()],[v[4] for k,v in inst_percent_dict.items()]))
print('Prov-ELA',stats.pearsonr(\
            [v[3] for k,v in inst_percent_dict.items()],[v[4] for k,v in inst_percent_dict.items()]))


### Stats - communa type

In [None]:
inst_communa_count_dict = {}

for row in communa_data:
    if int(row[dist_header.index('Institution topic')]) == 1:
        inst_communa_count_dict[row[0]] = []
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Urban')]))
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Rural')]))
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# Provincial Mixed')]))
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Urban')]))
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Rural')]))
        inst_communa_count_dict[row[0]].append(int(row[communa_header.index('# ELA Mixed')]))

tot_prov_urban = sum([v[0] for _,v in inst_communa_count_dict.items()])
tot_prov_rural = sum([v[1] for _,v in inst_communa_count_dict.items()])
tot_prov_mixed = sum([v[2] for _,v in inst_communa_count_dict.items()])
tot_ela_urban = sum([v[3] for _,v in inst_communa_count_dict.items()])
tot_ela_rural = sum([v[4] for _,v in inst_communa_count_dict.items()])
tot_ela_mixed = sum([v[5] for _,v in inst_communa_count_dict.items()])

# Convert counts to percentages of total
communa_percent_dict = {}
for k,v in inst_communa_count_dict.items():
    communa_percent_dict[k] = [round((v[0]/tot_prov_urban)*100,2),\
                       round((v[1]/tot_prov_rural)*100,2),\
                       round((v[2]/tot_prov_mixed)*100,2),\
                       round((v[3]/tot_ela_urban)*100,2),\
                       round((v[4]/tot_ela_rural)*100,2),\
                       round((v[5]/tot_ela_mixed)*100,2)]
    
# Print stats
print('Prov_urban-Prov_rural',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[1] for k,v in communa_percent_dict.items()]))
print('Prov_urban-Prov_mixed',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[2] for k,v in communa_percent_dict.items()]))
print('Prov_rural-Prov_mixed',stats.pearsonr(\
            [v[1] for k,v in communa_percent_dict.items()],[v[2] for k,v in communa_percent_dict.items()]))

print()

print('ELA_urban-ELA_rural',stats.pearsonr(\
            [v[3] for k,v in communa_percent_dict.items()],[v[4] for k,v in communa_percent_dict.items()]))
print('ELA_urban-ELA_mixed',stats.pearsonr(\
            [v[3] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))
print('ELA_rural-ELA_mixed',stats.pearsonr(\
            [v[4] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))

print()

print('Prov_urban-ELA_urban',stats.pearsonr(\
            [v[0] for k,v in communa_percent_dict.items()],[v[3] for k,v in communa_percent_dict.items()]))
print('Prov_rural-ELA_rural',stats.pearsonr(\
            [v[1] for k,v in communa_percent_dict.items()],[v[4] for k,v in communa_percent_dict.items()]))
print('Prov_mixed-ELA_mixed',stats.pearsonr(\
            [v[2] for k,v in communa_percent_dict.items()],[v[5] for k,v in communa_percent_dict.items()]))

