In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets

In [2]:
boston = datasets.load_boston()

In [3]:
df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
df['target'] = boston.target

In [4]:
num_rows, num_attribs = df.shape

In [5]:
#check to see any null values in columns.
pd.isnull(df).any()
#another -- to display the rows containing null
#df[df.isna().any(axis=1)]

CRIM       False
ZN         False
INDUS      False
CHAS       False
NOX        False
RM         False
AGE        False
DIS        False
RAD        False
TAX        False
PTRATIO    False
B          False
LSTAT      False
target     False
dtype: bool

In [6]:
#%%timeit 
pearson = df.corr(method='pearson')

In [7]:
corr_with_target = pearson.target[:-1]

In [8]:
predictivity = corr_with_target[abs(corr_with_target).argsort()[::-1]]

In [9]:
predictivity

LSTAT     -0.737663
RM         0.695360
PTRATIO   -0.507787
INDUS     -0.483725
TAX       -0.468536
NOX       -0.427321
CRIM      -0.388305
RAD       -0.381626
AGE       -0.376955
ZN         0.360445
B          0.333461
DIS        0.249929
CHAS       0.175260
Name: target, dtype: float64

In [10]:
predictors = pearson.columns[:-1]
attrs = pearson.loc[predictors, predictors]
threshold = 0.5

In [11]:
important_corrs = attrs[abs(attrs) > threshold][attrs != 1.0].unstack().dropna().to_dict()

In [12]:
#unique pairs
data = list(set([(tuple(sorted(key)), important_corrs[key]) for key in important_corrs]))

In [13]:
unique_important_corrs = pd.DataFrame(data=data, columns=['attribute_pair', 'correlation'])

In [14]:
unique_important_corrs = unique_important_corrs.loc[abs(unique_important_corrs['correlation']).argsort()[::-1]]

In [15]:
unique_important_corrs

Unnamed: 0,attribute_pair,correlation
1,"(RAD, TAX)",0.910228
14,"(DIS, NOX)",-0.76923
8,"(INDUS, NOX)",0.763651
19,"(AGE, DIS)",-0.747881
9,"(AGE, NOX)",0.73147
13,"(INDUS, TAX)",0.72076
12,"(DIS, INDUS)",-0.708027
3,"(NOX, TAX)",0.668023
7,"(DIS, ZN)",0.664408
11,"(AGE, INDUS)",0.644779


In [None]:
sns.jointplot(df['RAD'], df['TAX'], kind='hex')

In [None]:
from pandas.plotting import scatter_matrix
scatter_matrix(df, s=150, figsize=[18, 18])
plt.show()