Here I am just looking at sample sizes, building the networks for each dataset and every level of classification

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Load-Data" data-toc-modified-id="Load-Data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Load Data</a></span></li><li><span><a href="#Check-Sample-Sizes-of-Non-Neuronal-Cell-Types" data-toc-modified-id="Check-Sample-Sizes-of-Non-Neuronal-Cell-Types-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Check Sample Sizes of Non-Neuronal Cell Types</a></span></li><li><span><a href="#Check-Recurrent-Labels" data-toc-modified-id="Check-Recurrent-Labels-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Check Recurrent Labels</a></span></li><li><span><a href="#Compute-Aggregate-Networks" data-toc-modified-id="Compute-Aggregate-Networks-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Compute Aggregate Networks</a></span></li><li><span><a href="#Compute-Aggregate-Networks-without-Zeng_10x_nuc_v3" data-toc-modified-id="Compute-Aggregate-Networks-without-Zeng_10x_nuc_v3-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Compute Aggregate Networks without Zeng_10x_nuc_v3</a></span></li><li><span><a href="#Compute-Compositional-Networks" data-toc-modified-id="Compute-Compositional-Networks-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Compute Compositional Networks</a></span></li><li><span><a href="#Compute-Aggregate-Markers" data-toc-modified-id="Compute-Aggregate-Markers-9"><span class="toc-item-num">9&nbsp;&nbsp;</span>Compute Aggregate Markers</a></span><ul class="toc-item"><li><span><a href="#Class-Label" data-toc-modified-id="Class-Label-9.1"><span class="toc-item-num">9.1&nbsp;&nbsp;</span>Class Label</a></span><ul class="toc-item"><li><span><a href="#No-Zeng-10x-Nuc-V3-Markers" data-toc-modified-id="No-Zeng-10x-Nuc-V3-Markers-9.1.1"><span class="toc-item-num">9.1.1&nbsp;&nbsp;</span>No Zeng 10x Nuc V3 Markers</a></span></li></ul></li><li><span><a href="#Subclass_label" data-toc-modified-id="Subclass_label-9.2"><span class="toc-item-num">9.2&nbsp;&nbsp;</span>Subclass_label</a></span></li><li><span><a href="#Cluster-Label" data-toc-modified-id="Cluster-Label-9.3"><span class="toc-item-num">9.3&nbsp;&nbsp;</span>Cluster Label</a></span></li></ul></li><li><span><a href="#Aggregagte-AUROCs" data-toc-modified-id="Aggregagte-AUROCs-10"><span class="toc-item-num">10&nbsp;&nbsp;</span>Aggregagte AUROCs</a></span></li><li><span><a href="#Compute-Aggregates-properly" data-toc-modified-id="Compute-Aggregates-properly-11"><span class="toc-item-num">11&nbsp;&nbsp;</span>Compute Aggregates properly</a></span></li></ul></div>

## Imports

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scanpy as sc
import bottleneck
from scipy import stats
import gc

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='white', font_scale=1.25)
plt.rc("axes.spines", top=False, right=False)
plt.rc('xtick', bottom=True)
plt.rc('ytick', left=True)

from itertools import combinations
import logging
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)


import networkx as nx

%matplotlib inline

In [3]:
import sys
sys.path.append('../scripts/')
sys.path.append('/home/bharris/Correlation_Coexpression/scripts/')
sys.path.append('/home/bharris/vshape/scripts/')

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
from rank import rank
from processify import processify
from egad import run_egad
from egad_by_gene import run_egad_gene_score

## Functions

## Load Data

In [7]:
dataset_dict = pd.read_csv(
    '/home/bharris/biccn_paper/data/dataset_dict_biccn_sets_7.csv',
    index_col=0).to_dict()

In [8]:
genes = np.genfromtxt(
        '/home/bharris/biccn_paper/data/highly_expressed_7_datasets_75k.csv',
        dtype=str)

In [9]:
nws_path = '/home/bharris/biccn_paper/data/networks/'
markers_path = '/home/bharris/biccn_paper/data/de_one_v_all/'

## Compute Aggregate Networks

In [28]:
cluster_recurrence = pd.read_csv(
    '/home/bharris/biccn_paper/data/cluster_recurrence.csv', index_col=0)
subclass_recurrence = pd.read_csv(
    '/home/bharris/biccn_paper/data/subclass_recurrence.csv', index_col=0)
subclass_recurrence.drop(index=['Meis2','Sst_Chodl'],inplace=True)

In [18]:
def compute_agg_nw(dataset, meta_level, selected_clusters,name=''):
    agg = np.zeros([genes.shape[0], genes.shape[0]])
    for cluster in selected_clusters:
        nw = pd.read_hdf(
            f'{nws_path}{dataset}/coexpression_nw_{meta_level}_{cluster}.hdf5',
            'nw')
        np.fill_diagonal(nw.values, 1)
        agg += nw.values
        del nw
        gc.collect()
    pd.DataFrame(agg, index=genes, columns=genes).to_hdf(
        f'{nws_path}{dataset}/pearson_agg_all_{meta_level}{name}.hdf5', 'nw')
    

In [30]:
for dataset in dataset_dict:
    logging.info(dataset)
    selected_clusters = cluster_recurrence.index[
        cluster_recurrence[dataset].astype(bool).values]
#    compute_agg_nw(dataset, 'cluster_label', selected_clusters)
    selected_subclass = subclass_recurrence.index[
        subclass_recurrence[dataset].astype(bool).values]
    compute_agg_nw(dataset, 'subclass_label', selected_subclass)
    compute_agg_nw(dataset, 'class_label', ['GABAergic', 'Glutamatergic'])
#     selected_joint_clusters = joint_cluster_recurrence.index[joint_cluster_recurrence[dataset].astype(bool).values]
#     compute_agg_nw(dataset, 'joint_cluster_label',selected_joint_clusters)

2020-03-05 13:41:48,364 - zeng_10x_cell
2020-03-05 13:41:53,373 - zeng_10x_nuc
2020-03-05 13:41:58,276 - zeng_smart_cell
2020-03-05 13:42:03,297 - zeng_smart_nuc
2020-03-05 13:42:08,005 - zeng_10x_cell_v3
2020-03-05 13:42:12,939 - zeng_10x_nuc_v3
2020-03-05 13:42:17,998 - macosko_10x_nuc_v3
