Solution to: [Day 7: Spearman's Rank Correlation Coefficient](https://www.hackerrank.com/challenges/s10-spearman-rank-correlation-coefficient/problem)

<h1 id="tocheading">Table of Contents</h1>
<div id="toc"></div>

In [1]:
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

In [None]:


This script contains 3 sections:
	1. Notes on Spearman's Rank Correlation Coefficient
	2. Solution with Standard Spearman Rank Correlation Coefficient Solution
	3. Solution with Spearman Rank Special Case for Unique Values


##########
# Notes
##########

## Spearman's Rank Correlation Coefficient

We have two random variables, X and Y:
	X = (x1, x2, x3, ... xn)
	Y = (y1, y2, y2, ... yn)

If Rank_x and Rank_y denote the respective ranks of each data point, then the Spearman's rank correlation coefficient, Rs, 
is the Pearson correlation coefficient of Rank_x and Rank_y.

## Example
	X = {0.2, 1.3, 0.2, 1.1, 1.4, 1.5}
	Y = {1.9, 2.2, 3.1, 1.2, 2.2, 2.2}

Rank_x correlating to X:
	Rank_x = [1, 3, 1, 2, 4, 5]
Rank_y correlating to Y:
	Rank_y = [2, 3, 4, 1, 3, 3]

Rs = the Pearson correlation coefficient of rank_x and rank_y, or 0.158114

## Special Case: X and Y don't contain duplicates
rs = 1 - 6 summation(di**2) / (n * (n**2 - 1))

[Proof](https://www.hackerrank.com/challenges/s10-spearman-rank-correlation-coefficient/tutorial)]
 ]
 */
"""

#########
Imports
#########

In [None]:
from typing import Tuple
import math
import statistics

#########
Input
#########

In [None]:
def get_input() -> Tuple[int, list, list]:
	"""Returns input for Spearman's rank correlation coefficient.

	Returns:
		Tuple[int, float, float]: number of items, x, y
	"""
	return 10, [10, 9.8, 8, 7.8, 7.7, 1.7, 6, 5, 1.4, 2, ], [200, 44, 32, 24, 22, 17, 15, 12, 8, 4,]

	num_items = int(input())
	x = [float(val) for val in input().split()]
	y = [float(val) for val in input().split()]
	return num_items, x, y

#########
Print
#########

In [None]:
def print_to_scale(num: int) -> None:
	"""Prints number formatted to 3 decimal scale.

	Args:
		num (int): Number to print to scale
	"""
	print(f"{num :.3f}")

#########
Spearman's Rank - standard implementation
#########

In [None]:
def get_ranks(x: list) -> list:
	"""Returns corresponding ranks of x list.

	Args:
		x (list): List to rank

	Returns:
		list: Rankings of x
	"""
	x_sorted = sorted(x)

	## Get rank of each val
	val_ranks = dict()
	for i in range(len(x_sorted)):
		val_ranks[x_sorted[i]] = (i + 1)

	## Create ranking
	x_ranks = []
	for val in x:
		x_ranks.append( val_ranks[val])
	return x_ranks

In [None]:
def calc_mean(x: list) -> float:
	"""Returns mean of list

	Args:
		x (list): list to mean

	Returns:
		float: mean of x
	"""
	# mean = sum(x) / len(x)
	# print(mean)
	# print(statistics.mean(x))
	return sum(x) / len(x)

In [None]:
def calc_sd(x: list, population: bool = True) -> float:
	"""Returns standard deviation of list, x.

	Args:
		x (list): List to get sd

	Returns:
		float: standard deviation of x
	"""
	x_mean = calc_mean(x)
	numerator = 0
	for val in x:
		numerator += ((val - x_mean) ** 2)

	n = len(x) if population else len(x) - 1
	sd = (numerator / n) ** (1/2)
	return sd

In [None]:
def calc_cov(num_items: int, x: list, y: list) -> float:
	"""Calculates covariance for (x, y).

	Args:
		num_items (int): length of x, y
		x (list): series 1
		y (list): series 2

	Returns:
		float: covariance between x and y.
	"""
	mean_x, mean_y = calc_mean(x), calc_mean(y)

	numerator = 0
	for i in range(num_items):
		numerator += (x[i] - mean_x) * (y[i] - mean_y)
	
	return numerator / num_items

In [None]:
def calc_spearmans_coef(num_items: int, x: list, y: list) -> float:
	"""Returns spearman's coefficient for x,y

	Args:
		num_items (int): length of both lists
		x (list): series 1
		y (list): series 2

	Returns:
		float: Spearman's coefficient for (x, y)
	"""
	assert len(x) == len(y)
	rank_x, rank_y = get_ranks(x), get_ranks(y)

	cov = calc_cov(num_items, rank_x, rank_y)
	sd_x, sd_y = calc_sd(rank_x), calc_sd(rank_y)

	return cov / (sd_x * sd_y)

In [None]:
def spearman_rank_norm_sol(num_items: int, x: list, y: list) -> None:
	"""Prints solution using standard formula for spearman rank.

	Args:
		num_items (int): number of items
		x (list): Series 1
		y (list): Series 2
	"""
	print_to_scale( calc_spearmans_coef(num_items, x, y))

#########
Spearman correlation solution for unique values
#########

In [None]:
def calc_d(x: list, y: list) -> float:
	"""Returns difference between respective values of x and y.

	Args:
		x_ranks (list): Series 1
		y_ranks (list): Series 2

	Returns:
		float: statistic representing difference between x and y ranks
	"""
	x_ranks, y_ranks = get_ranks(x), get_ranks(y)

	sum_dif = 0
	for i in range(len(x_ranks)):
		sum_dif += (x_ranks[i] - y_ranks[i]) ** 2
	return sum_dif

In [None]:
def spearman_rank_unique_vals(num_items: int, x: list, y: list) -> None:
	"""Prints solution using formula for unique values.

	Args:
		num_items (int): number of items
		x (list): Series 1
		y (list): Series 2
	"""
	d = calc_d(x, y)
	r = 1 - ((6 * d) / (num_items * (num_items**2 - 1)))
	print_to_scale(r)

#########
Main
#########

In [None]:
def main():
	num_items, x, y = get_input()

	spearman_rank_norm_sol(num_items, x, y)
	spearman_rank_unique_vals(num_items, x, y)

In [None]:
if __name__ == "__main__":
	main()