## Covariance & Correlation with Python

In [1]:
import seaborn as sns

In [2]:
import numpy as np

In [3]:
x=np.array([5,4,3,2,6,1])
y=np.array([5,4,3,1,2,6])

In [4]:
x_std=np.std(x)

In [5]:
x_std

1.707825127659933

In [6]:
y_std=np.std(y)

In [7]:
y_std

1.707825127659933

In [8]:
cov=np.cov(x,y)[0,1]

In [9]:
cov

-0.7000000000000001

In [10]:
spearman=cov/(x_std*y_std)

In [11]:
spearman

-0.24000000000000005

#### USing Dataset

In [17]:
df=sns.load_dataset('healthexp')

In [18]:
df.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [19]:
df.dtypes

Year                 int64
Country             object
Spending_USD       float64
Life_Expectancy    float64
dtype: object

#### Covariance

In [27]:
# Covariance between Spending_USD and Life_Expectancy
covariance=df["Spending_USD"].cov(df["Life_Expectancy"])
covariance

4166.800911526429

## Correlation

In [36]:
# Correlation between Spending_USD and Life_Expectancy.. Pearson(by default it is Pearson)
correlation=df["Spending_USD"].corr(df["Life_Expectancy"])
correlation

0.5794304588530952

In [34]:
# Spearman Rank Correlation
spearman_corr=df["Spending_USD"].corr(df["Life_Expectancy"],method='spearman')
spearman_corr

0.7474072371099253

In [38]:
df2=sns.load_dataset('penguins')

In [40]:
df2.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [44]:
numeric_df=df2.select_dtypes(include='number')
pearson=numeric_df.corr()
pearson

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
bill_length_mm,1.0,-0.235053,0.656181,0.59511
bill_depth_mm,-0.235053,1.0,-0.583851,-0.471916
flipper_length_mm,0.656181,-0.583851,1.0,0.871202
body_mass_g,0.59511,-0.471916,0.871202,1.0


In [46]:
spearman_correlation=numeric_df.corr(method='spearman')
spearman_correlation

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
bill_length_mm,1.0,-0.221749,0.672772,0.5838
bill_depth_mm,-0.221749,1.0,-0.523267,-0.432372
flipper_length_mm,0.672772,-0.523267,1.0,0.839974
body_mass_g,0.5838,-0.432372,0.839974,1.0
