### Import libraries

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import time
import feather
from os import listdir
from os.path import join, isfile
sns.set(style="whitegrid")
pd.options.mode.chained_assignment = None 

--------------------------------------

### Load correlation matrices

In [None]:
fileList = ['corrMats/' + f for f in listdir('corrMats/') if isfile(join('corrMats/', f))]
fileList

--------------------------------------

In [None]:
df = feather.read_dataframe(fileList[2])
df

### Add title

In [None]:
def compute_max_corr(absMat, feat_i):
    
    # for each feature in the method, take the maximum correlation
    featList = absMat[feat_i].unique()
    maxList = []
    
    for feat in featList:
        
        maxList.append(absMat[absMat[feat_i] == feat]['corr'].max())
    
    return sorted(maxList, reverse=True), featList


def corr_for_each_feat(corrMat, first=False, second=False):
    
    # drop all rows with NaN value
    absMat = corrMat.dropna()
    absMat['corr'] = absMat['corr'].abs()
    
    if first:
        method1 = corrMat['method1'].unique()[0]
        method2 = corrMat['method2'].unique()[0]
        
        maxList, featList = compute_max_corr(absMat, 'feat1')
        data = {'method': [method1] * len(maxList), 'feat': featList, 'maxCorr': maxList}
        
    elif second:
        method2 = corrMat['method1'].unique()[0]
        method1 = corrMat['method2'].unique()[0]
        
        maxList, featList = compute_max_corr(absMat, 'feat2')
        l = len(featList)
        data = {'method1': [method1] * l, 'method2': [method2] * l, 'feat': featList, 'maxCorr': maxList}
        
    return pd.DataFrame.from_dict(data)
    
    
def plot_maxAbs(df, title=' '):
    
    method1 = df['method1'].unique()[0]
    method2 = df['method2'].unique()[0]
    
    y_pos = np.arange(len(df))
    
    # Create bars
    plt.bar(y_pos, df['maxCorr'], color='navy')
    
    # Create names on the x-axis
    if title != ' ':
        plt.xticks(y_pos, df['feat'].unique(),rotation='vertical')

    # Show graphic
    plt.xlabel(method1 + ' feature')
    plt.ylabel('Max. Abs. Correlation')
    # displaying the title
    plt.suptitle("Distribution of " + method1 + " max abs correlation by feature with " + method2)
    plt.title(title)
    plt.show()
    

In [None]:
df = feather.read_dataframe(fileList[7])
a = corr_for_each_feat(df, second=True)
plot_maxAbs(a)

In [None]:
b = a.drop(a[a.maxCorr >= 0.2].index)
b

In [None]:
for i in b['feat'].unique():
    print(i)

In [None]:
b = a.drop(a[a.maxCorr >= 0.2].index)
plot_maxAbs(b, "Only features with < 0.2 max. abs. correlation")

In [None]:
c = a.drop(a[a.maxCorr <= 0.9].index)
plot_maxAbs(c, "Only features with > 0.9 max. abs. correlation")

--------------------------------------