In [None]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    'a':[1,3,4,6,8],
    'b':[2,3,5,6,8],
    'c':[6,5,4,3,2],
    'd':[5,4,3,4,6]
})
df


In [None]:
import matplotlib.pyplot as plt
plt.scatter(df['a'], df['b'])
plt.xlabel('a')
plt.ylabel('b')


In [None]:
plt.scatter(df['b'], df['c'])
plt.xlabel('b')
plt.ylabel('c')


In [None]:
plt.scatter(df['c'], df['d'])
plt.xlabel('c')
plt.ylabel('d')


In [None]:
# covariance for a and b
((df['a'] -  df['a'].mean()) * \
 (df['b'] -  df['b'].mean())).sum() / \
(df.shape[0] - 1)


In [None]:
np.cov(df['a'],df['b'])

In [None]:
np.cov(df['b'], df['c'])
# array([[ 5.7 , -3.75],
#        [-3.75,  2.5 ]])


In [None]:
np.cov(df['c'], df['d'])
# array([[ 2.5, -0.5],
#        [-0.5,  1.3]])


In [None]:
np.cov(df['a']*2, df['b']*2)
# array([[29.2, 25.4],
#        [25.4, 22.8]])


In [None]:
df[['a','b']].corr()

In [None]:
df[['b','c']].corr()

In [None]:
df[['c','d']].corr()

In [None]:
df['2a'] = df['a']*2 # multiply the values in a by 2
df['2b'] = df['b']*2 # multiply the values in b by 2
df[['2a','2b']].corr() # the result is the same as
# df[['a','b']].corr()


In [None]:
df = pd.DataFrame({
    'math'   :[78,89,75,67,60,58,71],
    'science':[91,85,90,80,60,56,84]
})
df


In [None]:
plt.scatter(df['math'], df['science'])
plt.xlabel('math')
plt.ylabel('science')


In [None]:
df['math_rank'] = df['math'].rank(ascending=False)
df['science_rank'] = df['science'].rank(ascending=False)
df


In [None]:
df['diff'] = df['math_rank'] - df['science_rank']
df['diff_sq'] = np.square(df['diff'])
df


In [None]:
n = df.shape[0]
p = 1 - ((6 * df['diff_sq'].sum()) / (n * (n**2 - 1)))
p   # 0.8928571428571429


In [None]:
df[['math','science']].corr(method='spearman')

In [None]:
df[['math','science']].corr(method='pearson')

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

def calculate_vif(df, features):    
    vif, tolerance = {}, {}
    # all the features that you want to examine
    for feature in features:
        # extract all the other features you will regress against
        X = [f for f in features if f != feature]        
        X, y = df[X], df[feature]
        # extract r-squared from the fit
        r2 = LinearRegression().fit(X, y).score(X, y)                
        
        # calculate tolerance
        tolerance[feature] = 1 - r2
        # calculate VIF
        vif[feature] = 1/(tolerance[feature])
    # return VIF DataFrame
    return pd.DataFrame({'VIF': vif, 'Tolerance': tolerance})


In [None]:
import pandas as pd
data = [
    ['BP', 'Age', 'Weight', 'BSA', 'Dur', 'Pulse', 'Stress'],
    [105, 47, 85.4, 1.75, 5.1, 63, 33],
    [115, 49, 94.2, 2.1, 3.8, 70, 14],
    [116, 49, 95.3, 1.98, 8.2, 72, 10],
    [117, 50, 94.7, 2.01, 5.8, 73, 99],
    [112, 51, 89.4, 1.89, 7, 72, 95],
    [121, 48, 99.5, 2.25, 9.3, 71, 10],
    [121, 49, 99.8, 2.25, 2.5, 69, 42],
    [110, 47, 90.9, 1.9, 6.2, 66, 8],
    [110, 49, 89.2, 1.83, 7.1, 69, 62],
    [114, 48, 92.7, 2.07, 5.6, 64, 35],
    [114, 47, 94.4, 2.07, 5.3, 74, 90],
    [115, 49, 94.1, 1.98, 5.6, 71, 21],
    [114, 50, 91.6, 2.05, 10.2, 68, 47],
    [106, 45, 87.1, 1.92, 5.6, 67, 80],
    [125, 52, 101.3, 2.19, 10, 76, 98],
    [114, 46, 94.5, 1.98, 7.4, 69, 95],
    [106, 46, 87, 1.87, 3.6, 62, 18],
    [113, 46, 94.5, 1.9, 4.3, 70, 12],
    [110, 48, 90.5, 1.88, 9, 71, 99],
    [122, 56, 95.7, 2.09, 7, 75, 99],
]
df = pd.DataFrame(data[1:], columns=data[0])
df


In [None]:
import seaborn as sns
sns.pairplot(df)


In [None]:
df.corr()

In [None]:
calculate_vif(df=df, features=['Age','Weight','BSA','Pulse'])

In [None]:
calculate_vif(df=df, features=['Age','BSA','Pulse'])

In [None]:
calculate_vif(df=df, features=['Age','Weight','Pulse'])

In [None]:
from sklearn import datasets
bc = datasets.load_breast_cancer()
df = pd.DataFrame(bc.data, columns=bc.feature_names)
df


In [None]:
sns.pairplot(df.iloc[:,:8])

In [None]:
calculate_vif(df=df, features=df.columns[:8])

In [None]:
calculate_vif(df=df, features=['mean radius', 
                               'mean texture', 
                               'mean area', 
                               'mean smoothness', 
                               'mean compactness', 
                               'mean concavity',
                               'mean concave points'])


In [None]:
calculate_vif(df=df, features=['mean radius', 
                               'mean texture',
                                # 'mean area', 
                               'mean smoothness', 
                               'mean compactness', 
                               'mean concavity',
                               'mean concave points'])


In [None]:
calculate_vif(df=df, features=['mean radius', 
                               'mean texture',
                                # 'mean area', 
                               'mean smoothness', 
                               'mean compactness', 
                               'mean concavity',
                                # 'mean concave points'
                              ])


In [None]:
calculate_vif(df=df, features=['mean radius', 
                               'mean texture',
                                # 'mean area', 
                               'mean smoothness', 
                               'mean compactness', 
                                # 'mean concavity',
                                # 'mean concave points'
                              ])
