# Correlation and feature importance

Ok, after looking at the general trends on the data, as well as lookin at proof that different subgroups have very different behaviors, it is time to figure out how variables correlate with each other, as well as finding which are the variables that better describe the variable we are interested in.

In this case, climbers will probably want to learn which variables correlate with their climbing performance, so we will use some analytic tools to find which variables are the best at predicting climbing performance. And being good at predicting something, usually means that modifying that would modify the variable of interest as you desire (become better climbers in this example).

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
fullData = pd.read_csv("filteredData.csv")
fullData.describe()

Unnamed: 0,FEMALE,AGE,YRS_CLIMBING,START_AGE,PARENTS,HEIGHT,APEINDEX,WEIGHT,BMI,OCC_STUDY,...,B_INPOTENTIAL,B_OUTPOTENTIAL,S_FLASH,S_REDP,S_FLASHCONF,S_FINISHCONF,S_AVG,S_FPOTENTIAL,S_MPOTENTIAL,S_INCONSISTENCY
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,...,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,0.4,27.63871,5.356774,22.281935,0.083871,171.728258,0.696774,63.649677,21.493991,0.43871,...,1.335484,1.2,4.922581,6.993548,3.496774,6.025806,5.359677,2.070968,-0.967742,1.425806
std,0.491486,8.26971,5.490145,6.645587,0.278093,9.735438,2.974729,10.270835,2.257924,0.497838,...,0.948529,1.164407,3.853714,4.614343,3.187586,4.496678,3.905341,1.844999,1.828417,1.35775
min,0.0,15.0,0.5,10.0,0.0,150.0,-10.0,40.0,16.003658,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,0.0
25%,0.0,22.0,2.0,19.0,0.0,164.5,0.0,56.0,19.993063,0.0,...,1.0,0.0,1.0,3.0,0.5,2.0,2.0,0.0,-2.0,0.0
50%,0.0,25.0,4.0,21.0,0.0,170.0,0.0,63.0,21.258503,0.0,...,1.0,1.0,5.0,8.0,3.0,6.0,5.5,2.0,-1.0,1.0
75%,1.0,30.5,6.0,24.25,0.0,179.0,0.0,70.0,22.81034,1.0,...,2.0,2.0,8.0,10.0,6.0,9.0,8.25,3.0,0.0,3.0
max,1.0,58.0,31.0,53.0,1.0,208.0,13.0,108.0,27.777778,1.0,...,6.0,7.0,15.0,18.0,12.0,17.0,15.25,8.0,3.0,5.0


In [3]:
normData = pd.read_csv("normData.csv")
normData.describe()

Unnamed: 0,FEMALE,AGE,YRS_CLIMBING,START_AGE,PARENTS,HEIGHT,APEINDEX,WEIGHT,BMI,OCC_STUDY,...,B_INPOTENTIAL,B_OUTPOTENTIAL,S_FLASH,S_REDP,S_FLASHCONF,S_FINISHCONF,S_AVG,S_FPOTENTIAL,S_MPOTENTIAL,S_INCONSISTENCY
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,...,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,0.4,0.293923,0.159238,0.285626,0.083871,0.374625,0.465077,0.347789,0.466305,0.43871,...,0.222581,0.171429,0.328172,0.38853,0.291398,0.354459,0.351454,0.258871,0.504032,0.285161
std,0.491486,0.192319,0.180005,0.154549,0.278093,0.167852,0.129336,0.151042,0.19177,0.497838,...,0.158088,0.166344,0.256914,0.256352,0.265632,0.26451,0.256088,0.230625,0.228552,0.27155
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.162791,0.04918,0.209302,0.0,0.25,0.434783,0.235294,0.338828,0.0,...,0.166667,0.0,0.066667,0.166667,0.041667,0.117647,0.131148,0.0,0.375,0.0
50%,0.0,0.232558,0.114754,0.255814,0.0,0.344828,0.434783,0.338235,0.446305,0.0,...,0.166667,0.142857,0.333333,0.444444,0.25,0.352941,0.360656,0.25,0.5,0.2
75%,1.0,0.360465,0.180328,0.331395,0.0,0.5,0.434783,0.441176,0.578105,1.0,...,0.333333,0.285714,0.533333,0.555556,0.5,0.529412,0.540984,0.375,0.625,0.6
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
data = normData.copy()
data.iloc[:,-17:] = fullData.iloc[:,-17:]
data.describe()

Unnamed: 0,FEMALE,AGE,YRS_CLIMBING,START_AGE,PARENTS,HEIGHT,APEINDEX,WEIGHT,BMI,OCC_STUDY,...,B_INPOTENTIAL,B_OUTPOTENTIAL,S_FLASH,S_REDP,S_FLASHCONF,S_FINISHCONF,S_AVG,S_FPOTENTIAL,S_MPOTENTIAL,S_INCONSISTENCY
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,...,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,0.4,0.293923,0.159238,0.285626,0.083871,0.374625,0.465077,0.347789,0.466305,0.43871,...,1.335484,1.2,4.922581,6.993548,3.496774,6.025806,5.359677,2.070968,-0.967742,1.425806
std,0.491486,0.192319,0.180005,0.154549,0.278093,0.167852,0.129336,0.151042,0.19177,0.497838,...,0.948529,1.164407,3.853714,4.614343,3.187586,4.496678,3.905341,1.844999,1.828417,1.35775
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,0.0
25%,0.0,0.162791,0.04918,0.209302,0.0,0.25,0.434783,0.235294,0.338828,0.0,...,1.0,0.0,1.0,3.0,0.5,2.0,2.0,0.0,-2.0,0.0
50%,0.0,0.232558,0.114754,0.255814,0.0,0.344828,0.434783,0.338235,0.446305,0.0,...,1.0,1.0,5.0,8.0,3.0,6.0,5.5,2.0,-1.0,1.0
75%,1.0,0.360465,0.180328,0.331395,0.0,0.5,0.434783,0.441176,0.578105,1.0,...,2.0,2.0,8.0,10.0,6.0,9.0,8.25,3.0,0.0,3.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,6.0,7.0,15.0,18.0,12.0,17.0,15.25,8.0,3.0,5.0
