## Python Spearman's Rank Correlation Coefficient

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

Example 1 Manual Calculation

In [2]:
hits = np.array([150, 180, 120, 210, 160])

In [3]:
rbis = np.array([75, 90, 50, 110, 85])

In [4]:
hits_rank = stats.rankdata(hits)

In [5]:
print(hits_rank)

[2. 4. 1. 5. 3.]


In [6]:
rbis_rank = stats.rankdata(rbis)

In [7]:
print(rbis_rank)

[2. 4. 1. 5. 3.]


In [8]:
d = hits_rank - rbis_rank

In [9]:
n = len(hits)

In [10]:
d_squared = d ** 2

In [11]:
spearman_rank_manual =  1 - (6 * np.sum(d_squared)) / (n * (n**2 - 1))

In [12]:
print(spearman_rank_manual)

1.0


Example 2: Scipy Example

In [13]:
spearman_rank_scipy, p_value = stats.spearmanr(hits, rbis)

In [14]:
print(spearman_rank_scipy)

0.9999999999999999


Example 3: Ordinal Data

In [15]:
data = {
    'Hours_studied': [10, 15, 5, 18],
    'Grades': ['C', 'A', 'D', 'B']
}

In [16]:
df = pd.DataFrame(data)
df

Unnamed: 0,Hours_studied,Grades
0,10,C
1,15,A
2,5,D
3,18,B


In [17]:
grade_mapping = {'A': 4, 'B': 3, 'C':2, 'D': 1}

In [18]:
df['Grades_Ordinal'] = df['Grades'].map(grade_mapping)

In [19]:
df

Unnamed: 0,Hours_studied,Grades,Grades_Ordinal
0,10,C,2
1,15,A,4
2,5,D,1
3,18,B,3


In [20]:
df['Hours_Rank'] = df['Hours_studied'].rank()

In [21]:
df['Grades_Rank'] = df['Grades_Ordinal'].rank()

In [22]:
df

Unnamed: 0,Hours_studied,Grades,Grades_Ordinal,Hours_Rank,Grades_Rank
0,10,C,2,2.0,2.0
1,15,A,4,3.0,4.0
2,5,D,1,1.0,1.0
3,18,B,3,4.0,3.0


In [23]:
# spearman calculation
spearman_rank_pandas = df[['Hours_Rank', 'Grades_Rank']].corr(method='spearman').iloc[0]
print(spearman_rank_pandas)

Hours_Rank     1.0
Grades_Rank    0.8
Name: Hours_Rank, dtype: float64
