<a href="https://colab.research.google.com/github/albercej/zug/blob/master/central_tendency.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Statistics For Financial Traders
### Part Three: Measures of Central Tendency

In [None]:
import math
import numpy as np
import pandas as pd
from prettytable import PrettyTable

In [None]:
df = pd.read_csv("btcusdt_jan_2020.csv")
df

Unnamed: 0,OpenTime,Open,High,Low,Close,CloseTime
0,"Wed 01 Jan, 2020 01:00:00",7195.24,7255.0,7175.15,7200.85,"Thu 02 Jan, 2020 00:59:59"
1,"Thu 02 Jan, 2020 01:00:00",7200.77,7212.5,6924.74,6965.71,"Fri 03 Jan, 2020 00:59:59"
2,"Fri 03 Jan, 2020 01:00:00",6965.49,7405.0,6871.04,7344.96,"Sat 04 Jan, 2020 00:59:59"
3,"Sat 04 Jan, 2020 01:00:00",7345.0,7404.0,7272.21,7354.11,"Sun 05 Jan, 2020 00:59:59"
4,"Sun 05 Jan, 2020 01:00:00",7354.19,7495.0,7318.0,7358.75,"Mon 06 Jan, 2020 00:59:59"
5,"Mon 06 Jan, 2020 01:00:00",7357.64,7795.34,7346.76,7758.0,"Tue 07 Jan, 2020 00:59:59"
6,"Tue 07 Jan, 2020 01:00:00",7758.9,8207.68,7723.71,8145.28,"Wed 08 Jan, 2020 00:59:59"
7,"Wed 08 Jan, 2020 01:00:00",8145.92,8455.0,7870.0,8055.98,"Thu 09 Jan, 2020 00:59:59"
8,"Thu 09 Jan, 2020 01:00:00",8054.72,8055.96,7750.0,7817.76,"Fri 10 Jan, 2020 00:59:59"
9,"Fri 10 Jan, 2020 01:00:00",7817.74,8199.0,7672.0,8197.02,"Sat 11 Jan, 2020 00:59:59"


### Mean

In [None]:
def mean(data: list, days: int = 14) -> float:
    return round(sum(data) / len(data),2)

def median(data: pd.Series, days: int = 14) -> int:
	"""Returns the median of a Pandas series:
	Args:
		data (pd.Series): Set of observations
		days (int): Number of days. Defaults to 14.
	Returns:
		int
	"""
	return round(series.iloc[-days:].median(),2)

#### Replace the `mean()` function in the next cell with the preferred function.

In [None]:
def mean(series: pd.Series, days: int = 14) -> float:
    return round(series.iloc[-days:].mean(),2)

series = df['Close']
mean_price = mean(series)
print(f"${mean_price}")

# Outputs: $8851.92

$8851.92


### Median

In [None]:
def median(data: list, days: int = 14) -> float:
    data = sorted(data[-days:])
    length = len(data)
    half = int(length / 2)
    
    if length % 2 == 0:
        new = data[half-1:half+1]
        return round(sum(new) / len(new),2)
    else:
        return data[half]

def median(data: pd.Series, days: int = 14) -> int:
    """Returns the median of a Pandas series:
    Args:
        data (pd.Series): Set of observations
        days (int): Number of days. Defaults to 14.
    Returns:
        int
    """
    return round(series.iloc[-days:].median(),2)

#### Replace the `median()` function in the next cell with the preferred function.

In [None]:
def median(data: pd.Series, days: int = 14) -> int:
    return round(series.iloc[-days:].median(),2)

series = df['Close']
mid_price = median(series,14)
print(f"${mid_price}")

# Outputs: $8718.87

$8718.87


### Mode

In [None]:
def mode(data):
    return max(set(data), key=data.count)
    
def mode(data: pd.Series):
    return data.mode()

#### Replace the `mode()` function in the next cell with the preferred function.

In [None]:
def mode(data: pd.Series):
    return data.mode()

series = df['Close']
print(mode(series))

0     6965.71
1     7200.85
2     7344.96
3     7354.11
4     7358.75
5     7758.00
6     7817.76
7     8020.01
8     8055.98
9     8110.34
10    8145.28
11    8184.98
12    8197.02
13    8340.58
14    8404.52
15    8439.00
16    8615.00
17    8642.35
18    8682.36
19    8701.70
20    8720.01
21    8736.03
22    8810.01
23    8821.41
24    8907.57
25    8913.28
26    8915.96
27    9301.53
28    9352.89
29    9374.21
30    9513.21
dtype: float64


### Grouped Frequency Distribution

In [None]:
def prec(num: int):
    """Returns the number of decimal places"""
    # check if there is a decimal in the stringed numnber
    if '.' in str(num):
        # looks for the decimal point index
        ind = str(num).index('.')
        # returns the number of characters from the decimal...
        # point to the end of the number
        return len(str(num)[ind:]) - 1
    else:
        # returns 0 if no decimal point is found
        return 0

def count_range(data: list, interval: tuple):
    """Returns the number of elements in a list
    within a specified range
    """
    ctr = 0
    for k in sorted(data):
        if interval[0] <= k <= interval[1]:
            ctr += 1
    return ctr

def grouped_frequency(data: list) -> dict:
    # creates empty lists for upper and lower class limits
    lcls = list()
    ucls = list()
    
    # empty dictionary for classes and frequency
    freq = {
        "Zones" : [],
        "Frequency (f)" : []
    }
    
    # STEP 1: Determine range of data
    data_range = round(max(data) - min(data),2)

    # Derive decimal points
    val = prec(data_range)

    
    # STEP 2: Calculate no. of classes
    no_class = round(1 + 3.332 * math.log10(len(data)),val)
    
    # STEP 3: Determine an approximate class size
    class_size = round(data_range/no_class,val) # change
    
    # STEP 4: Decide starting point or lowest class limit
    start = min(data)

    # STEP 5: Determine the remaining class limits
    for i in np.arange(1,no_class+1):
        lcl = round(start,val)
        start += class_size
        ucl  = round(start-0.01,val) # change
        
        # append derived class limits
        lcls.append(lcl)
        ucls.append(ucl)
    
    # STEP 6: Distribute the respective classes and count
    for class_limits in zip(lcls, ucls):
        # count no. of items in a class interval
        frequency = count_range(data, class_limits)
        # append class and frequency
        freq['Zones'].append(f'${class_limits[0]} - ${class_limits[1]}')
        freq['Frequency (f)'].append(frequency)
        # freq.update({f"{class_limits[0]} - {class_limits[1]}":counts})
    
    # return classes and frequency 
    return freq

series = df['Close'].to_list()
freq = grouped_frequency(series)
keys = list(freq.keys())

# Create the table with column names from the returned dictionary
table = PrettyTable([keys[0], keys[1]], title="BTCUSDT Zones (1 Jan. - 31 Jan. 2020)")

# append nested list to the table
for i, j in zip(freq['Zones'], freq['Frequency (f)']):
    table.add_row([i, j])

print(table)

print(freq)
print(sum(freq['Frequency (f)']) == len(series))

# Outputs:
#   {'Zones': ['$6965.7 - $7390.3', '$7390.3 - $7814.9', '$7814.9 - $8239.5', '$8239.5 - $8664.1', '$8664.1 - $9088.7', '$9088.7 - $9513.3'], 'Frequency (f)': [5, 1, 7, 5, 9, 4]} 
#   True

+----------------------------------------+
| BTCUSDT Zones (1 Jan. - 31 Jan. 2020)  |
+----------------------+-----------------+
|        Zones         |  Frequency (f)  |
+----------------------+-----------------+
|  $6965.7 - $7390.3   |        5        |
|  $7390.3 - $7814.9   |        1        |
|  $7814.9 - $8239.5   |        7        |
|  $8239.5 - $8664.1   |        5        |
|  $8664.1 - $9088.7   |        9        |
|  $9088.7 - $9513.3   |        4        |
+----------------------+-----------------+
{'Zones': ['$6965.7 - $7390.3', '$7390.3 - $7814.9', '$7814.9 - $8239.5', '$8239.5 - $8664.1', '$8664.1 - $9088.7', '$9088.7 - $9513.3'], 'Frequency (f)': [5, 1, 7, 5, 9, 4]}
True
