In [76]:
import numpy as np
import pandas as pd
from pathlib import Path
import json
import yaml

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [77]:
folder_to_harvest = Path("../data/douhet")

json_paths = folder_to_harvest.glob("**/*.json")

db = pd.DataFrame()

for data_path in json_paths:

    course_label =  data_path.stem.replace("_data", "")
    course, edition, ctx = course_label.split("_")

    associazione_path = Path(f"./data/{course}/{course}_{edition}/associazione.xlsx")
    associazione_df = pd.read_excel(associazione_path)
    
    with open(data_path, "r") as fin:
        
        data = json.loads(fin.read())
        root_key = list(data.keys())[0]
        
        sna_micro_stats_a = data[root_key]["sna"]["micro_stats_a"]
        sna_micro_stats_b = data[root_key]["sna"]["micro_stats_b"]
        sna_micro_sociogram = data[root_key]["sociogram"]["micro_stats"]
        
        sna_micro_stats_a_df = pd.DataFrame(sna_micro_stats_a).T.add_suffix("_a", axis=1)
        sna_macro_stats_b_df = pd.DataFrame(sna_micro_stats_b).T.add_suffix("_b", axis=1)
        sociogram_micro_stats_df = pd.DataFrame(sna_micro_sociogram).T
        
        course_df =  pd.concat([sna_micro_stats_a_df, sna_macro_stats_b_df, sociogram_micro_stats_df], axis=1)
        course_df = course_df.reset_index(names="id")
        course_df.insert(0, "corso", course)
        course_df.insert(1, "anno", edition)
        course_df.insert(3, "contesto", ctx)
        
        course_df = course_df.merge(associazione_df, left_on="id", right_on="lettera").drop("lettera", axis=1)
        columns_to_reorder =  list(course_df.columns)
        course_df = course_df.loc[:, [ *columns_to_reorder[:1], columns_to_reorder[-1], *columns_to_reorder[1:-1] ]]
    
    db = pd.concat([db, course_df])

db.soggetto = db.soggetto.str.lower()
db.loc[db.soggetto == "albanese michele", "soggetto"] = "albanese michelle"
db.loc[db.soggetto == "iovane daniele", "soggetto"] = "iovane daniele thomas"
db.loc[db.soggetto == "luongo beatrice", "soggetto"] = "luongo beatrice gaia azzurra"
db.loc[db.soggetto == "panicola giacomo christofer", "soggetto"] = "panicola giacomo christopher"
db.loc[db.soggetto == "suriano campagna", "soggetto"] = "suriano campagna mario"

In [94]:
long_db = db.copy()
long_db = long_db.rename(columns={"soggetto":  "id", "id": "lettera"})
long_db = long_db.filter(items=[c for c in long_db.columns if "rank" not in c])
long_db = long_db.filter(items=[c for c in long_db.columns if "lns" not in c])
c = long_db.columns
long_db = long_db.loc[:, [c[1], c[0], c[2], c[4], c[3], *c[5:] ]]
long_db = long_db.loc[long_db.corso == "vega", :].drop(columns=["corso"])
long_db.head()
long_db.to_excel("./analysis/abgrid_long.xlsx", index=False)

In [92]:
wide_db = long_db.sort_values(by=["id", "anno", "contesto"])
wide_db = wide_db.pivot(index="id", columns=["contesto", "anno"])
wide_db.columns = ['_'.join(col).strip() for col in wide_db.columns.values]
wide_db.reset_index(names=["id"]).to_excel("./analysis/abgrid_wide.xlsx", index=False, na_rep=np.nan)
wide_db

Unnamed: 0_level_0,lettera_lud_i,lettera_stu_i,lettera_lud_ii,lettera_stu_ii,ic_a_lud_i,ic_a_stu_i,ic_a_lud_ii,ic_a_stu_ii,pr_a_lud_i,pr_a_stu_i,pr_a_lud_ii,pr_a_stu_ii,bt_a_lud_i,bt_a_stu_i,bt_a_lud_ii,bt_a_stu_ii,cl_a_lud_i,cl_a_stu_i,cl_a_lud_ii,cl_a_stu_ii,hu_a_lud_i,hu_a_stu_i,hu_a_lud_ii,hu_a_stu_ii,nd_a_lud_i,nd_a_stu_i,nd_a_lud_ii,nd_a_stu_ii,ic_b_lud_i,ic_b_stu_i,ic_b_lud_ii,ic_b_stu_ii,pr_b_lud_i,pr_b_stu_i,pr_b_lud_ii,pr_b_stu_ii,bt_b_lud_i,bt_b_stu_i,bt_b_lud_ii,bt_b_stu_ii,cl_b_lud_i,cl_b_stu_i,cl_b_lud_ii,cl_b_stu_ii,hu_b_lud_i,hu_b_stu_i,hu_b_lud_ii,hu_b_stu_ii,nd_b_lud_i,nd_b_stu_i,nd_b_lud_ii,nd_b_stu_ii,received_preferences_lud_i,received_preferences_stu_i,received_preferences_lud_ii,received_preferences_stu_ii,received_rejections_lud_i,received_rejections_stu_i,received_rejections_lud_ii,received_rejections_stu_ii,given_preferences_lud_i,given_preferences_stu_i,given_preferences_lud_ii,given_preferences_stu_ii,given_rejections_lud_i,given_rejections_stu_i,given_rejections_lud_ii,given_rejections_stu_ii,mutual_preferences_lud_i,mutual_preferences_stu_i,mutual_preferences_lud_ii,mutual_preferences_stu_ii,mutual_rejections_lud_i,mutual_rejections_stu_i,mutual_rejections_lud_ii,mutual_rejections_stu_ii,balance_lud_i,balance_stu_i,balance_lud_ii,balance_stu_ii,orientation_lud_i,orientation_stu_i,orientation_lud_ii,orientation_stu_ii,impact_lud_i,impact_stu_i,impact_lud_ii,impact_stu_ii,affiliation_coeff_raw_lud_i,affiliation_coeff_raw_stu_i,affiliation_coeff_raw_lud_ii,affiliation_coeff_raw_stu_ii,influence_coeff_raw_lud_i,influence_coeff_raw_stu_i,influence_coeff_raw_lud_ii,influence_coeff_raw_stu_ii,affiliation_coeff_lud_i,affiliation_coeff_stu_i,affiliation_coeff_lud_ii,affiliation_coeff_stu_ii,influence_coeff_lud_i,influence_coeff_stu_i,influence_coeff_lud_ii,influence_coeff_stu_ii,status_lud_i,status_stu_i,status_lud_ii,status_stu_ii
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1
abbamondi mario,A,A,A,A,0.081081,0.0,0.107143,0.107143,0.042997,0.004934,0.057399,0.061845,0.011887,0.0,0.05291,0.072156,0.303332,0.0,0.3667,0.394366,0.053911,0.017801,0.050918,0.040528,0,1,0.0,0.0,0.054054,0.054054,0.035714,0.107143,0.018422,0.017352,0.025082,0.106543,0.014264,0.008258,0.0,0.005291,0.054054,0.054054,0.047619,0.251742,0.048975,0.057605,0.0,0.043932,0,0,2.0,0.0,3,0,3.0,3.0,2,2,1.0,3.0,3,3,3.0,3.0,3,3,0.0,1.0,2,0,2.0,1.0,0,0,0.0,1.0,1,-2,2.0,0.0,0,0,3.0,2.0,5,2,4.0,6.0,1,-2,5.0,2.0,5,0,5.0,4.0,97.971054,92.25023,105.222232,97.89923,104.019421,92.453141,102.650214,100.882583,controversial,underrated,-,controversial
albanese michelle,B,B,B,B,0.027027,0.027027,0.178571,0.035714,0.009828,0.042179,0.038896,0.013434,0.021396,0.141479,0.150551,0.039638,0.061776,0.335949,0.388593,0.247788,0.01565,0.01975,0.009917,0.026952,0,0,0.0,0.0,0.0,0.027027,0.035714,0.0,0.008635,0.029406,0.042476,0.009501,0.0,0.005255,0.033069,0.0,0.0,0.204247,0.194444,0.0,0.032719,0.081605,0.081769,0.065713,1,0,0.0,1.0,1,1,5.0,1.0,0,1,1.0,0.0,3,3,3.0,3.0,3,3,3.0,3.0,1,0,2.0,1.0,0,1,1.0,0.0,1,0,4.0,1.0,0,0,0.0,0.0,1,2,6.0,1.0,1,0,4.0,1.0,2,1,7.0,2.0,97.971054,95.977967,102.655372,95.868487,93.605466,94.946886,109.333362,95.194825,neglected,underrated,-,neglected
bagatin giulio,C,C,,,0.081081,0.054054,,,0.02781,0.011243,,,0.050075,0.014802,,,0.284029,0.202878,,,0.080617,0.025469,,,0,0,,,0.0,0.0,,,0.008635,0.011076,,,0.0,0.0,,,0.0,0.0,,,0.047432,0.018157,,,1,1,,,3,2,,,0,0,,,3,3,,,3,3,,,0,0,,,0,0,,,3,2,,,0,0,,,3,2,,,3,2,,,3,2,,,101.826052,99.705705,,,97.076785,97.44063,,,-,underrated,,
bonfanti francesca,D,D,C,C,0.027027,0.378378,0.107143,0.25,0.013109,0.05173,0.012132,0.024796,0.017267,0.091629,0.036243,0.109832,0.027027,0.395484,0.185969,0.261682,0.005962,0.042549,0.02521,0.005156,0,0,0.0,0.0,0.0,0.027027,0.178571,0.0,0.008635,0.015355,0.05388,0.009501,0.0,0.000751,0.0,0.0,0.0,0.040541,0.206044,0.0,0.0,0.001682,0.0,0.060143,3,0,2.0,1.0,1,14,3.0,7.0,0,1,5.0,0.0,3,3,3.0,3.0,0,2,0.0,3.0,1,1,0.0,2.0,0,0,0.0,0.0,1,13,-2.0,7.0,3,1,3.0,0.0,1,15,8.0,7.0,4,14,1.0,7.0,2,15,3.0,9.0,103.753551,122.07213,94.954793,108.05295,93.605466,129.859311,95.967066,115.10198,neglected,popular,rejected,appreciated
ciliento giacomo pio,E,E,D,D,0.135135,0.081081,0.214286,0.285714,0.040211,0.038719,0.067044,0.110191,0.050663,0.022472,0.10377,0.087645,0.284029,0.343332,0.419931,0.538462,0.019668,0.039374,0.049236,0.042894,0,0,0.0,0.0,0.0,0.0,0.0,0.035714,0.008635,0.011076,0.018394,0.015207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.128571,0.0,0.0,0.0,0.0,3,3,3.0,2.0,5,3,6.0,8.0,0,0,0.0,1.0,3,3,3.0,3.0,0,0,0.0,0.0,1,0,2.0,1.0,0,0,0.0,0.0,5,3,6.0,7.0,3,3,3.0,3.0,5,3,6.0,9.0,8,6,9.0,10.0,6,3,8.0,9.0,111.463546,107.16118,115.489672,114.145182,107.49074,99.934375,112.674936,115.10198,-,-,appreciated,popular
cimini flaminia,F,F,,,0.108108,0.027027,,,0.015701,0.016886,,,0.02475,0.051076,,,0.108108,0.249946,,,0.005923,0.01534,,,0,0,,,0.0,0.081081,,,0.008635,0.034757,,,0.0,0.025526,,,0.0,0.180978,,,0.048975,0.057605,,,1,0,,,4,1,,,0,3,,,3,3,,,3,3,,,2,0,,,0,0,,,4,-2,,,0,0,,,4,4,,,4,-2,,,6,1,,,103.753551,92.25023,,,107.49074,94.946886,,,-,-,,
criscuolo lorenzo,G,G,E,E,0.081081,0.027027,0.0,0.035714,0.043024,0.006333,0.005375,0.009385,0.078453,0.008121,0.0,0.009694,0.291993,0.027027,0.0,0.198582,0.024383,0.046728,0.062184,0.038944,0,0,1.0,0.0,0.0,0.0,0.035714,0.035714,0.008635,0.011076,0.030429,0.012193,0.0,0.0,0.002646,0.000661,0.0,0.0,0.157563,0.035714,0.001109,0.008838,0.009207,0.007486,1,1,0.0,0.0,3,1,0.0,1.0,0,0,1.0,1.0,3,3,3.0,3.0,1,1,1.0,1.0,2,0,0.0,0.0,0,0,0.0,0.0,3,1,-1.0,0.0,2,2,2.0,2.0,3,1,1.0,2.0,5,3,1.0,2.0,5,1,0.0,1.0,105.681049,101.569574,94.954793,97.89923,104.019421,94.946886,85.942344,92.350945,-,underrated,neglected,underrated
cristiano asia assunta,H,H,F,F,0.0,0.0,0.035714,0.0,0.005379,0.004934,0.010288,0.005172,0.0,0.0,0.0,0.0,0.0,0.0,0.201827,0.0,0.007033,0.022593,0.010933,0.005501,1,1,0.0,1.0,0.378378,0.216216,0.107143,0.285714,0.147064,0.104744,0.040852,0.066404,0.102102,0.145445,0.04828,0.081349,0.469219,0.357432,0.170732,0.308673,0.028413,0.023587,0.078927,0.023892,0,0,0.0,0.0,0,0,1.0,0.0,14,8,3.0,8.0,3,3,3.0,3.0,3,3,3.0,3.0,0,0,1.0,0.0,3,0,0.0,1.0,-14,-8,-2.0,-8.0,0,0,0.0,0.0,14,8,4.0,8.0,-14,-8,-2.0,-8.0,0,0,2.0,0.0,69.05857,81.067017,87.254213,77.591791,86.662829,92.453141,92.625492,89.507066,rejected,rejected,rejected,rejected
dalla nave eleonora,I,I,,,0.054054,0.027027,,,0.012343,0.009718,,,0.007245,0.005493,,,0.072072,0.201569,,,0.008349,0.011464,,,0,0,,,0.0,0.0,,,0.008635,0.011076,,,0.0,0.0,,,0.0,0.0,,,0.033063,0.011367,,,1,1,,,2,1,,,0,0,,,3,3,,,1,1,,,2,0,,,0,0,,,2,1,,,2,2,,,2,1,,,4,3,,,4,1,,,103.753551,101.569574,,,100.548103,94.946886,,,underrated,underrated,,
de luca gaia,J,J,G,G,0.108108,0.054054,0.071429,0.035714,0.043473,0.031411,0.017341,0.006638,0.02953,0.084585,0.04127,0.00765,0.278958,0.318809,0.252774,0.035714,0.009686,0.026253,0.007949,0.048046,0,0,0.0,0.0,0.0,0.027027,0.0,0.035714,0.008635,0.014214,0.018394,0.012193,0.0,0.004505,0.0,0.010582,0.0,0.027027,0.0,0.035714,0.023065,0.051732,0.0,0.060298,1,0,3.0,0.0,4,2,2.0,1.0,0,1,0.0,1.0,3,3,3.0,3.0,3,3,0.0,2.0,3,1,2.0,0.0,0,0,0.0,0.0,4,1,2.0,0.0,0,0,3.0,1.0,4,3,2.0,2.0,4,1,5.0,1.0,7,3,4.0,1.0,103.753551,97.841836,105.222232,95.868487,110.962058,99.934375,99.30864,92.350945,-,-,neglected,underrated


In [93]:
wide_db[wide_db.isna().sum(axis=1)>0].reset_index(names="id").loc[:, ["id"]].to_excel("./analysis/casi_da_attenzionare.xlsx", index=False)