# Implement basic metrics in Stats from scratch

https://www.hackerrank.com/contests/diploma-in-ai-and-ml-uoh/challenges/implement-basic-metrics-in-stats-from-scratch

<img src="problem1.png">

## Mean

In [2]:
import numpy as np
import random 

arr = np.random.randint(1, 100, 2000)
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [3]:
def mean(x):
    
    result = sum(x)/len(x)
    
    return result

In [4]:
my_mean = mean(list(arr))
print(my_mean)

49.4995


In [5]:
np_mean = np.mean(arr)
print(np_mean)

49.4995


In [6]:
my_mean = mean(list(arr2))
print(my_mean)

25.008122


In [7]:
np_mean = np.mean(arr2)
print(np_mean)

25.008122


## Median

In [9]:
import numpy as np
arr = list(np.random.randint(1, 100, 2000))
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [10]:
def median(x):
    x = sorted(x)
    _len = len(x)
    ix = _len//2 - 1
    
    if _len%2 == 0:
        result = (x[ix] + x[ix+1]) / 2
    else:
        result = x[ix+1]
    
    return result

In [11]:
median(arr)

51.0

In [12]:
np.median(arr)

51.0

In [14]:
median(arr2), np.median(arr2)

(25.0, 25.0)

## Variance 

In [15]:
import numpy as np
arr = list(np.random.randint(1, 100, 2000))
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [16]:
def variance(x):
    
    _mean = mean(x)
    diff_mean = [ i-_mean for i in x]
    diff_sq = map(lambda x: x**2, diff_mean)
    
    result = sum(diff_sq)/len(x)
    
    return result
    

In [17]:
variance(arr)

796.8740999999993

In [18]:
np.var(arr)

796.8741

In [19]:
variance(arr2)

216.50547779942247

In [20]:
np.var(arr2)

216.5054777993749

In [21]:
arr = list(np.random.random(2000))

In [22]:
variance(arr)

0.08370960910216124

In [23]:
np.var(arr)

0.0837096091021613

## Standard Deviation

In [24]:
import numpy as np
arr = list(np.random.randint(1, 100, 2000))
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [25]:
def sd(x):
    _variance = variance(x)
    
    return _variance ** .5

In [26]:
sd(arr)

28.276476018061405

In [27]:
np.std(arr)

28.276476018061373

In [28]:
sd(arr2), np.std(arr2) 

(14.728777661117212, 14.728777661114856)

## Percentile

In [29]:
import numpy as np
arr = list(np.random.randint(1, 100, 2000))

In [30]:
def percentile(x, p):
    p = p/100

    x_sorted = sorted(x)
    x_len = len(x_sorted)
    if x_len > 1:
        x_min, x_max = x_sorted[::x_len-1]
    else:
        x_min = x_max = x[0] 

    ix = x_len * p
    ix = round(ix) - 1

    result = x_sorted[ix]
    
    return result

    

In [31]:
np.percentile(arr, 50)

50.0

In [32]:
percentile(arr, 50)

50

In [34]:
percentile(arr2, 50)

25

In [35]:
np.percentile(arr2, 50)

25.0

In [36]:
arr = np.random.randint(1, 10000, size=10000)

In [37]:
np.percentile(arr, 95)

9464.05

In [38]:
percentile(list(arr), 95)

9464

In [39]:
np.percentile(arr, 50)

5011.0

In [40]:
percentile(arr, 50)

5011

In [41]:
np.median(arr)

5011.0

In [42]:
median(arr)

5011.0

In [43]:
np.median(arr2), median(arr2)

(25.0, 25.0)

## IQR

In [44]:
import numpy as np
arr = list(np.random.randint(1, 100, 2000))
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [45]:
def iqr(x):
    q1 = percentile(x, 25)
    q2 = percentile(x, 50) 
    q3 = percentile(x, 75)
    q4 = percentile(x, 100)
    
    iqr = q3 - q1 
    
    return iqr

In [46]:
iqr(arr)

49

In [48]:
from scipy import stats

stats.iqr(arr, interpolation = 'midpoint')

49.0

In [49]:
iqr(arr2)

26

In [50]:
stats.iqr(arr2, interpolation = 'midpoint')

26.0

## Median Absolute Deviation

In [56]:
import numpy as np

arr = list(np.random.randint(1, 100, 2000))
arr2 = [random.randint(0,50) for _ in range(1000000)] 

In [62]:
def mad(x):
    
    x_sorted = sorted(x)
    _median = median(x)
    
    x_median_diff = [ abs(i - _median) for i in x_sorted]
    x_median_diff_sorted = sorted(x_median_diff)
    result = median(x_median_diff_sorted)
    
    return result
    


In [63]:
from statsmodels import robust
from scipy import stats

In [64]:
stats.median_abs_deviation(arr), stats.median_abs_deviation(arr2)

(25.0, 13.0)

In [67]:
mad(arr), mad(arr2)

(25.0, 13.0)

In [65]:
robust.mad(arr), robust.mad(arr2)

(37.065055462640046, 19.273828840572826)