# Statistical Operations

## Import libraries

In [86]:
#Import libraries
import numpy as np
import pandas as pd
from scipy import stats as st
import decimal

## Data

In [2]:
X = [3, 1, 2, 3, 1, 4, 3, 3]
Y = [1, 4, 3, 1, 1, 3]
Z = [3, 3, 1, 4, 2, 1, 4, 2]

In [3]:
pd_X = pd.Series(X)
pd_Y = pd.Series(Y)
pd_Z = pd.Series(Z)

## Mean of the given vector

In [4]:
#Mean of a data vector
def mean(data, power = 1, mu = 0, abs_bool = False):
    total = 0
    count = 0
    for i in data:
        if mu == 0 and abs_bool == False:    
            total += i**power
        elif mu != 0 and abs_bool == True:
            temp = absolute(i - mu) ** power
            total += temp
        count += 1
    return total / count        

In [5]:
print(mean(X, 2))
assert np.mean(X) == mean(X, 1)

7.25


In [6]:
print("Mean of X is {0}".format(mean(X, power = 1)))
print("Mean of Y is {0}".format(mean(Y, power = 1)))
print("Mean of Z is {0}".format(mean(Z, power = 1)))

Mean of X is 2.5
Mean of Y is 2.1666666666666665
Mean of Z is 2.5


## Merge Sort

In [7]:
def mergesort(data):
    if len(data) == 1:
        return data
    else:
        m = int(len(data) / 2)
        left = data[:m]
        right = data[m:]
        mergesort(left)
        mergesort(right)
        i = j = k =0
        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                data[k] = left[i]
                i += 1
            else:
                data[k] = right[j]
                j += 1
            k += 1
            
        while i < len(left):
            data[k] = left[i]
            i += 1
            k += 1
        
        while j < len(right):
            data[k] = right[j]
            j += 1
            k += 1
    return data

mergesort(Y)
            
            

[1, 1, 1, 3, 3, 4]

In [8]:
assert mergesort(X) == sorted(X)

### Utility Function - Length of the vector

In [9]:
def length(data):
    count = 0
    for i in data:
        count += 1
    return count

### Utility Function - Absolute value

In [10]:
def absolute(value):
    return value * ((value > 0) - (value < 0))

## Median of the given vector

In [11]:
def median(data):
    data = mergesort(data)
    if length(data) % 2 == 0:
        return (1/2) * (data[int(length(data) / 2) - 1] + data[int(length(data) / 2)])
    return data[int(length(data), 2) - 1]

In [12]:
print("Median of X is {}".format(median(X)))
print("Median of Y is {}".format(median(Y)))
print("Median of Z is {}".format(median(Z)))

Median of X is 3.0
Median of Y is 2.0
Median of Z is 2.5


In [13]:
assert np.median(X) == median(X)
assert np.median(Y) == median(Y)
assert np.median(Z) == median(Z)

## Mode of the given vector

In [14]:
def mode(data):
    data = mergesort(data)
    counter = {}
    max_value =  float('-inf')
    for val in data:
        if val not in counter:
            counter[val] = 0
        counter[val] += 1
    for key, value in counter.items():
        if value >= max_value:
            max_value = value
    for key, val in counter.items():
        if  val == max_value:
            return key
        

In [15]:
print("Mode of X is {}".format(mode(X)))
print("Mode of Y is {}".format(mode(Y)))
print("Mode of Z is {}".format(mode(Z)))

Mode of X is 3
Mode of Y is 1
Mode of Z is 1


In [16]:
assert mode(X) == st.mode(X)[0]
assert mode(Y) == st.mode(Y)[0]
assert mode(Z) == st.mode(Z)[0]

## Variance of given vector

In [17]:
def variance(data):
    return mean(data, 2) - (mean(data, 1)**2)

In [18]:
print(variance(X))

1.0


In [19]:
print("Variance of X is {}".format(variance(X)))
print("Variance of Y is {}".format(variance(Y)))
print("Variance of Z is {}".format(variance(Z)))

Variance of X is 1.0
Variance of Y is 1.4722222222222232
Variance of Z is 1.25


In [20]:
assert variance(X) == np.var(X)
assert variance(Y) == np.var(Y)
assert variance(Z) == np.var(Z)

AssertionError: 

## Standard Deviation of a given vector

In [21]:
def std(data):
    return variance(data)**0.5
print(std(Y))

1.2133516482134201


In [22]:
print("Standard Deviation of X is {}".format(std(X)))
print("Standard Deviation of Y is {}".format(std(Y)))
print("Standard Deviation of Z is {}".format(std(Z)))

Standard Deviation of X is 1.0
Standard Deviation of Y is 1.2133516482134201
Standard Deviation of Z is 1.118033988749895


## Mean Absolute Deviation around Mean, Median and Mode

In [23]:
def mad(data, centre = 'mean'):
    delta = 0
    if centre == 'mean':
        average = mean(data, 1)
    elif centre == 'median':
        average = median(data)
    elif centre == 'mode':
        average = mode(data)
    n = length(data)
    for val in data:
        delta += absolute(val - average)
    return delta / n

In [24]:
print("Mean Absolute Deviation of X around mean is {}".format(mad(X, centre = 'mean')))
print("Mean Absolute Deviation of Y around mean is {}".format(mad(Y, centre = 'mean')))
print("Mean Absolute Deviation of Z around mean is {}".format(mad(Z, centre = 'mean')))

Mean Absolute Deviation of X around mean is 0.875
Mean Absolute Deviation of Y around mean is 1.1666666666666667
Mean Absolute Deviation of Z around mean is 1.0


In [25]:
print("Mean Absolute Deviation of X around median is {}".format(mad(X, centre = 'median')))
print("Mean Absolute Deviation of Y around median is {}".format(mad(Y, centre = 'median')))
print("Mean Absolute Deviation of Z around median is {}".format(mad(Z, centre = 'median')))

Mean Absolute Deviation of X around median is 0.75
Mean Absolute Deviation of Y around median is 1.1666666666666667
Mean Absolute Deviation of Z around median is 1.0


In [26]:
print("Mean Absolute Deviation of X around mode is {}".format(mad(X, centre = 'mode')))
print("Mean Absolute Deviation of Y around mode is {}".format(mad(Y, centre = 'mode')))
print("Mean Absolute Deviation of Z around mode is {}".format(mad(Z, centre = 'mode')))

Mean Absolute Deviation of X around mode is 0.75
Mean Absolute Deviation of Y around mode is 1.1666666666666667
Mean Absolute Deviation of Z around mode is 1.5


## Inter Quartile Range

In [165]:
def quartiles(data):
    data = mergesort(data)
    n = length(data)
    med = median(data)
    
    lq = (1/2) * (data[int(n/4) - 1] + data[int((n / 4) + ((n % 4) == 0)) - 1])
    uq = (1/2) * (data[int((3 * n) / 4) - 1] + data[int(((3 * n) / 4) + (((3 * n) % 4) == 0)) - 1])
    #iqr = uq - lq
    return lq, uq

In [173]:
def iqr(data):
    lq, uq = quartiles(data)
    iqr = uq - lq
    return iqr

In [174]:
print("Inter Quartile Range of X is {}".format(iqr(X)))

2
3


IndexError: list index out of range

In [164]:
print("Inter Quartile Range of X is {}".format(iqr(X)))
print("Inter Quartile Range of Y is {}".format(iqr(Y)))
print("Inter Quartile Range of Z is {}".format(iqr(Z)))

IndexError: list index out of range

## Quartile based skewness

In [30]:
def qaurtile_skew(data):
    lq, uq = quartiles(data)
    mq = median(data)
    #skew = (((uq + lq) / 2 - mq) / ((uq - lq) / 2))
    #skew = (uq + lq -(2 * mq)) / (uq - lq)
    skew = ((uq - mq) - (mq - lq)) / (uq - lq)
    return skew


In [31]:
print("Quartile based skewness of X is {}".format(qaurtile_skew(X)))
print("Quartile based skewness of Y is {}".format(qaurtile_skew(Y)))
print("Quartile based skewness of Z is {}".format(qaurtile_skew(Z)))

Quartile based skewness of X is -1.0
Quartile based skewness of Y is 0.0
Quartile based skewness of Z is 0.0


## Pearson's first skewness

In [32]:
def person_first_skew(data):
    mu = mean(data)
    mo = mode(data)
    st = std(data)
    skew = (mu - mo) / st
    return skew

In [33]:
print("Peason's first skewness of X is {}".format(person_first_skew(X)))
print("Peason's first skewness of Y is {}".format(person_first_skew(Y)))
print("Peason's first skewness of Z is {}".format(person_first_skew(Z)))

Peason's first skewness of X is -0.5
Peason's first skewness of Y is 0.9615239476408227
Peason's first skewness of Z is 1.3416407864998738


## Peason's second skewness

In [34]:
def person_second_skew(data):
    mu = mean(data)
    md = median(data)
    st = std(data)
    skew = 3 * (mu - md) / st
    return skew

In [35]:
print("Peason's second skewness of X is {}".format(person_second_skew(X)))
print("Peason's second skewness of Y is {}".format(person_second_skew(Y)))
print("Peason's second skewness of Z is {}".format(person_second_skew(Z)))

Peason's second skewness of X is -1.5
Peason's second skewness of Y is 0.4120816918460666
Peason's second skewness of Z is 0.0


## Groeneveld & Meedens coefficient

In [36]:
def gro_med_coeff(data):
    mea = mean(data)
    med = median(data)
    delta = mean(data, power = 1, mu = med, abs_bool = True)
    return((mea - med) / delta)

In [37]:
print("Groeneveld & Meedens coefficient of X is {}".format(gro_med_coeff(X)))
print("Groeneveld & Meedens coefficient of Y is {}".format(gro_med_coeff(Y)))
print("Groeneveld & Meedens coefficient of Z is {}".format(gro_med_coeff(Z)))

Groeneveld & Meedens coefficient of X is -0.6666666666666666
Groeneveld & Meedens coefficient of Y is 0.1428571428571427
Groeneveld & Meedens coefficient of Z is 0.0


## Pearson's moment coefficient of skewness

In [138]:
def peason_moment_coeff(data):
    mea = mean(data)
    upd_data = []
    for i in range(length(data)):
        upd_data.append((data[i] - mea) ** 3) 
    new_mea = mean(upd_data)
    #delta = mean(data, power = 3, mu = mea, abs_bool = True)
    var3 = std(data)**3
    return (new_mea / var3)

In [139]:
print("Pearson's moment coefficient  of X is {}".format(peason_moment_coeff(X)))
print("Pearson's moment coefficient  of Y is {}".format(peason_moment_coeff(Y)))
print("Pearson's moment coefficient  of Z is {}".format(peason_moment_coeff(Z)))

Pearson's moment coefficient  of X is -0.375
Pearson's moment coefficient  of Y is 0.2384372053448942
Pearson's moment coefficient  of Z is 0.0


In [140]:
def kurtosis(data):
    mea = mean(data)
    upd_data = []
    for i in range(length(data)):
        upd_data.append((data[i] - mea) ** 4) 
    new_mea = mean(upd_data)
    #delta = mean(data, power = 3, mu = mea, abs_bool = True)
    var4 = std(data)**4
    return (new_mea / var4)

In [141]:
print("kurtosis sharpness  of X is {}".format(kurtosis(X)))
print("kurtosis sharpness  of Y is {}".format(kurtosis(Y)))
print("kurtosis sharpness  of Z is {}".format(kurtosis(Z)))

kurtosis sharpness  of X is 1.9375
kurtosis sharpness  of Y is 1.370238519045922
kurtosis sharpness  of Z is 1.6399999999999997


## Entropy

### Utility function - Probability of each element in the list

In [42]:
def probability(data, power = 1):
    n = length(data)
    dict = {}
    for i in range(n):
        if data[i] not in dict:
            dict[data[i]] = 1
        else:
            dict[data[i]] += 1
    for key in dict.keys():
        dict[key] = (dict[key] / n) ** power
    return dict 

In [43]:
print("Probabilities of X is {}".format(probability(X)))
print("Probabilities of Y is {}".format(probability(Y)))
print("Probabilities of Z is {}".format(probability(Z)))

Probabilities of X is {1: 0.25, 2: 0.125, 3: 0.5, 4: 0.125}
Probabilities of Y is {1: 0.5, 3: 0.3333333333333333, 4: 0.16666666666666666}
Probabilities of Z is {1: 0.25, 2: 0.25, 3: 0.25, 4: 0.25}


### Utility Function - ln(x)

In [115]:
def ln(x):
    temp = decimal.Decimal(0)
    for i in range(1,1000):
        temp += (1 / decimal.Decimal(i)) *  (((decimal.Decimal(x)-1) / decimal.Decimal(x))**decimal.Decimal(i))
    return float(temp)

print(ln(2))

0.6931471805599453


In [122]:
def log2(x):
    #return n * ((x ** (1/n)))
    return ln(x) / ln(2)
print(log2(1/0.25))

2.0


###  Entropy

In [125]:
def entropy(data):
    entro = 0
    probs = probability(data)
    for key in probs.keys():
        entro += (probs[key] * log2(1 / probs[key]))
    return  entro
    

In [126]:
print("Entropy of X is {}".format(entropy(X)))
print("Entropy of Y is {}".format(entropy(Y)))
print("Entropy of Z is {}".format(entropy(Z)))

Entropy of X is 1.75
Entropy of Y is 1.4591479170272448
Entropy of Z is 2.0


### Shannon Entopy

In [129]:
def shannon_entropy(data):
    entro = 0
    probs = probability(data)
    for key in probs.keys():
        entro += (probs[key] * ln(1 / probs[key]))
    return  entro

In [130]:
print("Shannon Entropy of X is {}".format(shannon_entropy(X)))
print("Shannon Entropy of Y is {}".format(shannon_entropy(Y)))
print("Shannon Entropy of Z is {}".format(shannon_entropy(Z)))

Shannon Entropy of X is 1.2130075659799042
Shannon Entropy of Y is 1.0114042647073516
Shannon Entropy of Z is 1.3862943611198906


### BergerParker index

In [152]:
def berger_parker_index(data):
    probs = probability(data)
    max_prob = mergesort([val for val in probs.values()])[-1]
    return max_prob

berger_parker_index(X)

0.5

In [153]:
print("BergerParker index of X is {}".format(berger_parker_index(X)))
print("BergerParker index of Y is {}".format(berger_parker_index(Y)))
print("BergerParker index of Z is {}".format(berger_parker_index(Z)))

BergerParker index of X is 0.5
BergerParker index of Y is 0.5
BergerParker index of Z is 0.25


### Simpson Index

In [133]:
def simpson_index(data):
    prob2 = probability(data, power = 2)
    simp = 0
    for i in prob2.values():
        simp += i
    return simp

In [158]:
print("Simpson index of X is {}".format(simpson_index(X)))
print("Simpson index of Y is {}".format(simpson_index(Y)))
print("Simpson index of Z is {}".format(simpson_index(Z)))

Simpson index of X is 0.34375
Simpson index of Y is 0.3888888888888889
Simpson index of Z is 0.25


### Gini-Simpson index

In [159]:
def gini_simpson_index(data):
    simp = simpson_index(data)
    gini = 1 - simp
    return gini

In [160]:
print("Gini-Simpson index of X is {}".format(gini_simpson_index(X)))
print("Gini-Simpson index of Y is {}".format(gini_simpson_index(Y)))
print("Gini-Simpson index of Z is {}".format(gini_simpson_index(Z)))

Gini-Simpson index of X is 0.65625
Gini-Simpson index of Y is 0.6111111111111112
Gini-Simpson index of Z is 0.75


### Collision Entropy

In [142]:
def collision_entropy(data):
    return ( ln(1 / simpson_index(data)))

In [143]:
print("Collision Entropy of X is {}".format(collision_entropy(X)))
print("Collision Entropy of Y is {}".format(collision_entropy(Y)))
print("Collision Entropy of Z is {}".format(collision_entropy(Z)))

Collision Entropy of X is 1.067840630001356
Collision Entropy of Y is 0.9444616088408513
Collision Entropy of Z is 1.3862943611198906
