In [1]:
%matplotlib inline

In [2]:
import csv
import os
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns



In [3]:
# CONF = 'conf_bdlmapper_v1'
# CONF = 'conf_neumapper_v1'
CONF = 'conf_demapper_v1_binning'

In [None]:
import os
basedir = '/scratch/groups/saggar/mapper/experiments/cme_shine375/CME_mapper2d_data/processed_{}'.format(CONF)

files = sorted([(fname, os.path.join(basedir, fname)) for fname in os.listdir(basedir) if fname.startswith('SBJ') and fname.endswith('.csv')])
selected_files = [f for i,f in enumerate(files) if i % 2 == 0]

In [None]:
[k for k,_ in selected_files]

In [None]:

def create_plot(df, ax, hparam, target):
    labels = sorted(list(df[hparam].unique()))
    data = [df[df[hparam] == label][target] for label in labels]
    ax = sns.boxplot(data=data, ax=ax)
    ax = sns.swarmplot(data=data, color=".25", ax=ax, size=1.5)
    ax.set_xticklabels(labels, rotation=10)
    ax.set_xlabel(hparam)
    ax.set_ylabel(target)
    ax.set_title('Distribution of {} over {}'.format(target,hparam))
    ax.grid(alpha=0.4)
    return ax


header = None
with open(os.path.join(basedir, 'header.csv')) as f:
    for row in csv.reader(f):
        header = row


outdir = os.path.join(basedir, 'hparams')
os.makedirs(outdir, exist_ok=True)

alpha_add = 1 if 'alpha' in header else 0
HPARAMS = [h for h in ['R', 'K', 'G'] if h in header]
HPARAMS_START = len(HPARAMS)
HPARAMS_PLOTS = [4+alpha_add,6,5]

assert len(header) == HPARAMS_START + sum(HPARAMS_PLOTS)

In [None]:
%%capture --no-stdout

for fname, fpath in files: # Or only selected_files
    print(fname)

    df = pd.read_csv(fpath, header=None)
    df.columns = header
    
    sns.set(style = "whitegrid")

    curr_hparam = HPARAMS_START
    for idx,hparam_size in enumerate(HPARAMS_PLOTS):
        targets = header[curr_hparam:curr_hparam + hparam_size]
        curr_hparam += hparam_size

        fig,axs = plt.subplots(nrows=len(targets), ncols=len(HPARAMS), figsize=(10*len(HPARAMS), 10*len(targets)))

        for axr, target in zip(axs, targets):
            for ax, hparam in zip(axr, HPARAMS):
                ax = create_plot(df, ax, hparam, target)

        plt.tight_layout()
        plt.savefig(
            os.path.join(
                outdir,
                'hparams_{}-{}.png'.format(fname.replace('.csv', ''), idx)),
            dpi=300)
        plt.close()

In [None]:

for fname, fpath in files: # Or only selected_files
    print(fname)

    df = pd.read_csv(fpath, header=None)
    df.columns = header
    C = df.corr()
    
    plt.figure(figsize=(10,10))

    ax = sns.heatmap(C, cmap='PiYG')
    ax.xaxis.tick_top() # x axis on top
    plt.xticks(rotation = 80)
    ax.xaxis.set_label_position('top')

    # plt.savefig("Plotting_Correlation_HeatMap.jpg")
    plt.savefig(os.path.join(outdir, 'corr_hps_{}.png'.format(fname.replace('.csv', ''))))
    plt.close()
    
    C.to_csv(os.path.join(outdir, 'corr_hps_{}'.format(fname)))
    

In [None]:
rcorrs = {}
all_header = None

for fname, fpath in files: # Or only selected_files
#     fname, fpath = files[0]

    csv_file = os.path.join(outdir, 'corr_hps_{}'.format(fname))

    hh = None
    with open(csv_file) as f:
        for row in csv.reader(f):
            if hh == None:
                hh = row[1+HPARAMS_START:]
                all_header = hh
            else:
                L = row[0]
                if L not in rcorrs:
                    rcorrs[L] = []
#                 print(row)
                rcorrs[L].append([float(r) if r != '' else 0.0 for r in row[1+HPARAMS_START:]])

In [None]:
HPARAMS

In [None]:
fig,axs = plt.subplots(nrows=1, ncols=len(HPARAMS), figsize=(10 * len(HPARAMS),10))

for ax, HP in zip(axs, HPARAMS):
    rcorrs[HP]
    HP_df = pd.DataFrame(np.array(rcorrs[HP]), columns=all_header)
    ax = sns.boxplot(data=HP_df, ax=ax)
    ax.set_ylabel('Correlation')
#     ax.set_xticks(rotation = 80)
    ax.set_title('Correlation distribution for {} '.format(HP))
    ax.set_ylim([-1, 1])
    
    props = {"rotation" : 80}
    plt.setp(ax.get_xticklabels(), **props)
    
plt.tight_layout()
plt.savefig(os.path.join(outdir, 'group_corr.png'))

In [None]:
import math

nrows = 4
ncols = math.ceil(len(all_header) / 4)
fig,axs = plt.subplots(nrows=4, ncols=4, figsize=(30,30))

HPs = ['R', 'K', 'G']

idx = 0
for axr in axs:
    for ax in axr:
        if idx >= len(all_header):
            continue
        target = all_header[idx]
        X = np.array([np.array(rcorrs[hp])[:,idx] for hp in HPARAMS]).T

#         rcorrs[HP]
        target_df = pd.DataFrame(X, columns=HPARAMS)
        ax = sns.boxplot(data=target_df, ax=ax)
        ax.set_ylabel('Correlation')
#         ax.set_xticks(rotation = 80)
        ax.set_title('Correlation distribution for {} '.format(target))
        ax.set_ylim([-1, 1])
        idx += 1
    
plt.tight_layout()
plt.savefig(os.path.join(outdir, 'group_corr_targets.png'))

In [None]:
len(all_header)