# Introduction

For each brain structure, we'll plot how similar its assigned mental functions are from cluster to cluster. 

# Load the data

In [1]:
import pandas as pd
import numpy as np
np.random.seed(42)

import sys
sys.path.append("..")
from ontology import ontology
from style import style

In [2]:
framework = "data-driven" 
version = 190325 # Document-term matrix version
suffix = "_oplen" # Suffix for term lists
clf = "_lr" # Suffix for data-driven classifiers
n_iter = 10 # Iterations for bootstrap and null
n = 5 # Number of top terms to include

## Cluster solutions

In [3]:
clusters = range(2,26)

## Words and circuits

In [4]:
solutions = {}
for k in clusters:
    lists, circuits = ontology.load_ontology(k, suffix="_logreg", path="../ontology/")
    solutions[k] = {"lists": lists, "circuits": circuits}

In [5]:
solutions[clusters[0]]["circuits"].head()

Unnamed: 0,STRUCTURE,CLUSTER,DOMAIN,TITLE
0,left_accumbens,1,AROUSAL,Arousal
1,left_amygdala,1,AROUSAL,Arousal
2,left_brainstem,1,AROUSAL,Arousal
3,left_caudate,1,AROUSAL,Arousal
4,left_frontal_medial_cortex,1,AROUSAL,Arousal


In [6]:
solutions[clusters[0]]["lists"].head()

Unnamed: 0,CLUSTER,TOKEN,R,ROC_AUC,DOMAIN,TITLE
0,1,reward,0.174514,0.654638,AROUSAL,Arousal
1,1,valence,0.153667,0.654638,AROUSAL,Arousal
2,1,emotion,0.143888,0.654638,AROUSAL,Arousal
3,1,arousal,0.14225,0.654638,AROUSAL,Arousal
4,1,memory,0.140675,0.654638,AROUSAL,Arousal


# Compute proportions of overlapping terms

In [7]:
import numpy as np
from scipy.spatial.distance import cdist, dice

In [8]:
structures = sorted(list(solutions[clusters[0]]["circuits"]["STRUCTURE"]))

In [9]:
sims_by_struct = {}
for struct in structures:
    domains_by_k, domain_terms_by_k, all_terms_by_k = {}, {}, {}
    for k in clusters:

        circuits = solutions[k]["circuits"]
        lists = solutions[k]["lists"]

        domain = circuits.loc[circuits["STRUCTURE"] == struct, "CLUSTER"].values[0]
        domains_by_k[k] = domain

        domain_terms = list(lists.loc[lists["CLUSTER"] == domain, "TOKEN"])[:n]
        domain_terms_by_k[k] = domain_terms

        terms = set(lists["TOKEN"])  
        all_terms_by_k[k] = terms

    sims = []
    for ki in clusters[:-1]:

        kj = ki + 1

        domain_term_intersection = set(domain_terms_by_k[ki]).intersection(set(domain_terms_by_k[kj]))
        sim = len(domain_term_intersection) / n
        sims.append(sim)
    
    sims_by_struct[struct] = sims

# Plot Dice similarities

In [10]:
import matplotlib.pyplot as plt
from matplotlib import font_manager, rcParams

In [11]:
font_prop_ax = font_manager.FontProperties(fname=style.font, size=14)
font_prop_label = font_manager.FontProperties(fname=style.font, size=18)
font_prop_title = font_manager.FontProperties(fname=style.font, size=20)
rcParams["axes.linewidth"] = 1.5

In [12]:
struct_label_df = pd.read_csv("../data/brain/labels.csv")
struct2label = {row["PREPROCESSED"]: row["ABBREVIATION"] 
                for i, row in struct_label_df.iterrows()}

## Grouped by hemisphere

In [13]:
left_structs = [struct for struct in structures if struct.startswith("left")]
right_structs = [struct for struct in structures if struct.startswith("right")]

In [14]:
path = ""

for hemi, structs in zip(["left", "right"], [left_structs, right_structs]):

    fig, axs = plt.subplots(len(structs), 1, figsize=(14, 50), sharex=True, sharey=True)
    fig.subplots_adjust(hspace=1, wspace=1)
    axs = axs.ravel()

    for i, struct in enumerate(structs):

        sims = sims_by_struct[struct]

        axs[i].plot(range(len(sims)), sims, "gray", 
                 linestyle="solid", linewidth=3)

        title = struct2label[struct].replace(" (L)", "").replace(" (R)", "")
        axs[i].set_ylabel(title, rotation=0, ha="left", fontproperties=font_prop_title)
        axs[i].yaxis.set_label_coords(1.02, 0.2)

        axs[i].set_xlim([0, len(sims)-1])
        axs[i].set_ylim([0, 1.2])
        
        axs[i].xaxis.set_tick_params(width=1.5, length=7)
        axs[i].yaxis.set_tick_params(width=1.5, length=7)
        
        axs[i].set_xticks(range(len(sims)))
        axs[i].set_xticklabels([])
        axs[i].set_yticks([0, 0.5, 1])
        axs[i].set_yticklabels([0, "", 1], fontproperties=font_prop_ax)
        
        for side in ["right", "top"]:
            axs[i].spines[side].set_visible(False)

    axs[i].set_xticklabels(clusters, fontproperties=font_prop_ax)
    axs[i].set_xlabel("Cluster $k_i$", fontproperties=font_prop_label, labelpad=15)
    fig.text(0.075, 0.5, "Proportion of top {} terms that intersect".format(n), 
             fontproperties=font_prop_label, va="center", rotation="vertical")

    plt.savefig("{}figures/hemispheres/stability_{}_top{}.png".format(path, hemi, n), 
                dpi=250, bbox_inches="tight")
    plt.close()

## Grouped by data-driven domain (<i>k</i>=6)

In [15]:
dd_circuit = solutions[6]["circuits"]
dd_structs = [list(dd_circuit.loc[dd_circuit["CLUSTER"] == i, "STRUCTURE"]) for i in range(1,7)]
dd_structs[0]

['left_cuneal_cortex',
 'left_inferior_temporal_gyrus_posterior_division',
 'left_inferior_temporal_gyrus_temporooccipital_part',
 'left_intracalcarine_cortex',
 'left_lateral_occipital_cortex_inferior_division',
 'left_lingual_gyrus',
 'left_middle_temporal_gyrus_temporooccipital_part',
 'left_occipital_fusiform_gyrus',
 'left_occipital_pole',
 'left_supracalcarine_cortex',
 'left_temporal_fusiform_cortex_posterior_division',
 'left_temporal_occipital_fusiform_cortex',
 'right_cuneal_cortex',
 'right_inferior_temporal_gyrus_temporooccipital_part',
 'right_intracalcarine_cortex',
 'right_lateral_occipital_cortex_inferior_division',
 'right_lingual_gyrus',
 'right_middle_temporal_gyrus_temporooccipital_part',
 'right_occipital_fusiform_gyrus',
 'right_occipital_pole',
 'right_supracalcarine_cortex',
 'right_temporal_occipital_fusiform_cortex']

In [16]:
dd_lists = pd.read_csv("../ontology/lists/lists_data-driven_lr.csv", index_col=None)
dd_domains = [dd_lists.loc[dd_lists["CLUSTER"] == i, "DOMAIN"].values[0] for i in range(1,7)]
dd_domains

['VISION', 'LANGUAGE', 'MANIPULATION', 'REWARD', 'COGNITION', 'MEMORY']

In [17]:
path = ""
c = style.c
colors = [c["purple"], c["gold"], c["green"], c["vermillion"], c["yellow"], c["blue"]]

for d_i, domain, structs in zip(range(6), dd_domains, dd_structs):

    fig, axs = plt.subplots(len(structs), 1, figsize=(14, 1.14*len(structs)), 
                            sharex=True, sharey=True)
    fig.subplots_adjust(hspace=1, wspace=1)
    axs = axs.ravel()

    for i, struct in enumerate(structs):

        sims = sims_by_struct[struct]

        axs[i].plot(range(len(sims)), sims, color=colors[d_i], 
                 linestyle="solid", linewidth=3)

        title = struct2label[struct]
        axs[i].set_ylabel(title, rotation=0, ha="left", fontproperties=font_prop_title)
        axs[i].yaxis.set_label_coords(1.02, 0.2)

        axs[i].set_xlim([0, len(sims)-1])
        axs[i].set_ylim([0, 1.2])
        
        axs[i].xaxis.set_tick_params(width=1.5, length=7)
        axs[i].yaxis.set_tick_params(width=1.5, length=7)
        
        axs[i].set_xticks(range(len(sims)))
        axs[i].set_xticklabels([])
        axs[i].set_yticks([0, 0.5, 1])
        axs[i].set_yticklabels([0, "", 1], fontproperties=font_prop_ax)
        
        for side in ["right", "top"]:
            axs[i].spines[side].set_visible(False)

    axs[i].set_xticklabels(clusters, fontproperties=font_prop_ax)
    axs[i].set_xlabel("Cluster $k_i$", fontproperties=font_prop_label, labelpad=15)
    fig.text(0.075, 0.5, "Proportion of top {} terms that intersect".format(n), 
             fontproperties=font_prop_label, va="center", rotation="vertical")

    plt.savefig("{}figures/domains/stability_{}_top{}.png".format(path, domain, n), 
                dpi=250, bbox_inches="tight")
    plt.close()