# Correlation analysis

Looking at the different types of ground motion and finding some relation
between the other characteristics of the points which are not ground motion.

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# load the data 
regression = pd.read_csv('./regression_values.csv')

In [None]:
# load timeseries data without the ground motion info
# now we are only interested in the other parameters. 

gm_csv = pd.read_csv('./tmp/160-IW1-414-s1-asc1-v2020.csv')

In [None]:
#remove all rows which are not dates - first date is on entry #21
gm_csv.rename(columns=lambda x: x.strip(), inplace=True)
gm_csv_t = gm_csv.T
gm_csv_t.rename(columns=gm_csv_t.iloc[0], inplace=True)
gm_csv_t_dates= gm_csv_t.iloc[1:21 , :]

In [None]:
parameters = gm_csv_t_dates

In [None]:
parameters.index

In [None]:
parameters = parameters.drop(['track', 'mode', 'burst', 'line', 'pixel', 'height_wgs84', 'pixel.1', 'line.1'])

In [None]:
parameters

In [None]:
# add a column with the classification type for each point
regression.set_index('pid', inplace=True)

In [None]:
regression = regression.T

In [None]:
regression.tail(3)

In [None]:
# add the column 
parameters = parameters.append(regression.loc['reg_type'])

In [None]:
# now onto the classification and the importance of features 

# transpose the dataframe again 

parameters = parameters.T

In [None]:
column_names = list(parameters) 

In [None]:
labels = parameters['reg_type']

In [None]:
parameters['reg_type'].value_counts()

In [None]:
parameters_no_labels = parameters.drop(['reg_type'], axis =1)

In [None]:
parameters_no_labels = pd.DataFrame(parameters_no_labels)

In [None]:
parameters_no_labels = parameters_no_labels.astype('float64')

In [None]:
correlation = parameters_no_labels.corr(method ='pearson', min_periods=1)


In [None]:
correlation_array = correlation.to_numpy()

In [None]:
plt.imshow(correlation_array)

In [None]:
from scipy import stats
cormat = parameters_no_labels.corr()

In [None]:
import seaborn as sns
fig, ax = plt.subplots(figsize=(10,10))         # Sample figsize in inches

#plt.imshow(cormat,ax=ax)
sns.heatmap(cormat,ax=ax)

but is correlation what I really want? 
Given a set of attributes I want to check if we can classify the data? 
I want to check for the correlation between the label and the other variables for each label group

Get the statistics separately for each of the label groups 
create 3 datasets and do PCA and correlation for each of the:

* ascending

* descending

* constant

* irregular

In [None]:
parameters.head(4)

In [None]:
grouped_parameters = parameters.groupby('reg_type')

In [None]:
grouped_parameters

In [None]:
for reg_type, df_reg_type in parameters.groupby('reg_type'):
    print(len(df_reg_type))

In [None]:
params_ascending = parameters[parameters['reg_type'] == 'ascending']
params_descending = parameters[parameters['reg_type'] == 'descending']
params_constant = parameters[parameters['reg_type'] == 'constant']
params_irregular = parameters[parameters['reg_type'] == 'irregular']

In [None]:
params_ascending_no_labels = params_ascending.drop(['reg_type'], axis =1)
params_descending_no_labels = params_descending.drop(['reg_type'], axis =1)
params_constant_no_labels = params_constant.drop(['reg_type'], axis =1)
params_irregular_no_labels = params_irregular.drop(['reg_type'], axis =1)


In [None]:
# need to convert to float for the correlation function
params_ascending_no_labels = params_ascending_no_labels.astype('float64')
params_descending_no_labels = params_descending_no_labels.astype('float64')
params_constant_no_labels = params_constant_no_labels.astype('float64')
params_irregular_no_labels = params_irregular_no_labels.astype('float64')


In [None]:
cormat_asc = params_ascending_no_labels.corr()
cormat_desc = params_descending_no_labels.corr()
cormat_const = params_constant_no_labels.corr()
cormat_irr = params_irregular_no_labels.corr()

In [None]:
plt.imshow(cormat_asc)
plt.title('my random fig')

In [None]:
plt.figure()
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
#subplot(r,c) provide the no. of rows and columns
f, axarr = plt.subplots(2,2) 


# use the created array to output your multiple images. In this case I have stacked 4 images vertically
a = axarr[0][0].imshow(cormat_asc, cmap='rainbow')
axarr[0][0].set_title('Ascending')
divider_a = make_axes_locatable(axarr[0][0])
cax_a = divider_a.append_axes('right', size='5%', pad=0.05)
f.colorbar(a,cax=cax_a, orientation='vertical', cmap = 'BrBG')

b=axarr[1][0].imshow(cormat_desc, cmap='rainbow')
axarr[1][0].set_title('Descending')
divider_b = make_axes_locatable(axarr[1][0])
cax_b = divider_b.append_axes('right', size='5%', pad=0.05)
f.colorbar(b,cax=cax_b, orientation='vertical')


c=axarr[0][1].imshow(cormat_const, cmap='rainbow')
axarr[0][1].set_title('Constant')
divider_c = make_axes_locatable(axarr[0][1])
cax_c = divider_c.append_axes('right', size='5%', pad=0.05)
f.colorbar(c,cax=cax_c, orientation='vertical')


d=axarr[1][1].imshow(cormat_irr, cmap='rainbow')
axarr[1][1].set_title('Irregular')
divider_d = make_axes_locatable(axarr[1][1])
cax_d = divider_d.append_axes('right', size='5%', pad=0.05)
f.colorbar(d,cax=cax_d, orientation='vertical')

plt.tight_layout()
plt.savefig('./figures/correlation_matrices.png')

In [None]:
cormat_irr.columns

In [None]:
# get the highest correlations in each of the classes

for i in range(len(cormat_irr.columns)):
    max_values = cormat_irr[cormat_irr.columns[i]].nlargest(2)
    print(max_values.index[1])
    print(max_values[1])

However these attributs don't tell us much because we don't 
really have any physical info
