In [81]:
#Importing all the necessary packages 

import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import pearsonr

In [84]:
#reading the data into a dataframe

df = pd.read_csv("Clean Data/data_phil_clean_103122.csv")

#the data usually loads with two indexes - so I am dropping the first index

df.drop(df.columns [0], axis= 1, inplace= True)

#make sure the data loads correctly
df.head()

Unnamed: 0,Finished,T_P_1,T_P_2,T_P_3,T_P_4,FI_1,FI_2,FI_3,FA_1,FA_2,...,Mani_Fail,Av_TP,Av_FI,Av_FA,Av_PC,Av_WOM,Av_EV,Av_DU,Av_C,Av_TN
0,1,5.0,4.0,5.0,5.0,4.0,2.0,2.0,4.0,3.0,...,0.0,4.75,2.67,3.5,6.5,2.0,3.75,4.75,5.0,4.75
1,1,6.0,6.0,6.0,6.0,2.0,2.0,2.0,6.0,6.0,...,0.0,6.0,2.0,6.0,6.0,2.0,3.0,6.0,5.0,5.0
2,1,6.0,2.0,2.0,2.0,6.0,2.0,4.0,2.0,3.0,...,0.0,3.0,4.0,2.75,7.0,6.67,7.0,6.5,1.0,3.0
3,1,3.0,3.0,4.0,4.0,5.0,4.0,5.0,2.0,4.0,...,0.0,3.5,4.67,3.25,5.75,4.33,4.75,4.25,3.5,4.25
4,1,5.0,5.0,5.0,5.0,3.0,2.0,2.0,5.0,5.0,...,1.0,5.0,2.33,4.75,4.0,3.0,3.5,4.25,3.5,5.0


In [22]:
#Since I don't want to correlate all columns, I am reading the columns I want to correlate into another df
#this works if you have your columns sorted in a specific way

data_corr = df.loc[:, 'Av_FI': 'Av_TN']
data_corr.head()

Unnamed: 0,Av_FI,Av_FA,Av_PC,Av_WOM,Av_EV,Av_DU,Av_C,Av_TN
0,2.67,3.5,6.5,2.0,3.75,4.75,5.0,4.75
1,2.0,6.0,6.0,2.0,3.0,6.0,5.0,5.0
2,4.0,2.75,7.0,6.67,7.0,6.5,1.0,3.0
3,4.67,3.25,5.75,4.33,4.75,4.25,3.5,4.25
4,2.33,4.75,4.0,3.0,3.5,4.25,3.5,5.0


In [79]:
#A correlation function that takes a df and returns a matrix with coeffecients(p-values)

def corr_mtx (df):
    corr_pval_df = pd.DataFrame(index=df.columns, columns=df.columns)

    for c in df.columns[:]:
        for d in df.columns[:]:
            if is_numeric_dtype(df[c]):
                if is_numeric_dtype(df[d]):
                    corr_str = None
                    correlation, pvalue = pearsonr(df[c], df[d])
                    corr_str = f'{correlation:.2f} ( {pvalue:.4f} )'
                    corr_pval_df.loc[c, d] = corr_str
    return corr_pval_df

In [80]:
#Testing the function

corr_mtx (data_corr)

Unnamed: 0,Av_FI,Av_FA,Av_PC,Av_WOM,Av_EV,Av_DU,Av_C,Av_TN
Av_FI,1.00 ( 0.0000 ),-0.38 ( 0.0005 ),0.20 ( 0.0707 ),0.44 ( 0.0001 ),0.43 ( 0.0001 ),-0.13 ( 0.2565 ),-0.08 ( 0.4763 ),-0.36 ( 0.0009 )
Av_FA,-0.38 ( 0.0005 ),1.00 ( 0.0000 ),-0.16 ( 0.1491 ),-0.57 ( 0.0000 ),-0.72 ( 0.0000 ),0.54 ( 0.0000 ),0.39 ( 0.0004 ),0.54 ( 0.0000 )
Av_PC,0.20 ( 0.0707 ),-0.16 ( 0.1491 ),1.00 ( 0.0000 ),0.16 ( 0.1689 ),0.23 ( 0.0373 ),0.13 ( 0.2676 ),-0.08 ( 0.4925 ),-0.22 ( 0.0539 )
Av_WOM,0.44 ( 0.0001 ),-0.57 ( 0.0000 ),0.16 ( 0.1689 ),1.00 ( 0.0000 ),0.62 ( 0.0000 ),-0.39 ( 0.0004 ),-0.32 ( 0.0042 ),-0.39 ( 0.0004 )
Av_EV,0.43 ( 0.0001 ),-0.72 ( 0.0000 ),0.23 ( 0.0373 ),0.62 ( 0.0000 ),1.00 ( 0.0000 ),-0.45 ( 0.0000 ),-0.26 ( 0.0207 ),-0.48 ( 0.0000 )
Av_DU,-0.13 ( 0.2565 ),0.54 ( 0.0000 ),0.13 ( 0.2676 ),-0.39 ( 0.0004 ),-0.45 ( 0.0000 ),1.00 ( 0.0000 ),0.25 ( 0.0272 ),0.34 ( 0.0023 )
Av_C,-0.08 ( 0.4763 ),0.39 ( 0.0004 ),-0.08 ( 0.4925 ),-0.32 ( 0.0042 ),-0.26 ( 0.0207 ),0.25 ( 0.0272 ),1.00 ( 0.0000 ),0.22 ( 0.0484 )
Av_TN,-0.36 ( 0.0009 ),0.54 ( 0.0000 ),-0.22 ( 0.0539 ),-0.39 ( 0.0004 ),-0.48 ( 0.0000 ),0.34 ( 0.0023 ),0.22 ( 0.0484 ),1.00 ( 0.0000 )
