From ac4af5a6b7f360b2a3cf5c6e1e32d9bbd30f399b Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Wed, 3 Jun 2020 21:06:31 +0100
Subject: [PATCH 01/11] Added type annotations

---
 mathematical/data_frames.py       | 24 +++++++------
 mathematical/linear_regression.py |  2 +-
 mathematical/outliers.py          | 28 +++++++--------
 mathematical/stats.py             | 34 +++++++++---------
 mathematical/utils.py             | 57 ++++++++++++++++---------------
 5 files changed, 76 insertions(+), 69 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 87e61bd..1b4b67c 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -28,12 +28,16 @@
 #
 
 # Outlier Modes
+from typing import List
+
+from pandas import Series
+
 MAD = 1
 QUARTILES = 2
 STDEV2 = 3
 
 
-def df_mean(row, column_label_list=None):
+def df_mean(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	"""
 	Calculate the mean of each row for the specified columns of a data frame
 
@@ -59,7 +63,7 @@ def df_mean(row, column_label_list=None):
 	return nanmean(row[column_label_list])
 
 
-def df_median(row, column_label_list=None):
+def df_median(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	"""
 	Calculate the median of each row for the specified columns of a data frame
 
@@ -85,7 +89,7 @@ def df_median(row, column_label_list=None):
 	return nanmedian(row[column_label_list])
 
 
-def df_stdev(row, column_label_list=None):
+def df_stdev(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	"""
 	Calculate the standard deviation of each row for the specified columns of a data frame
 
@@ -111,7 +115,7 @@ def df_stdev(row, column_label_list=None):
 	return nanstd(row[column_label_list])
 
 
-def df_log_stdev(row, column_label_list=None):
+def df_log_stdev(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	"""
 	Calculate the standard deviation of the log10 values in each row for the specified columns of a data frame
 
@@ -138,7 +142,7 @@ def df_log_stdev(row, column_label_list=None):
 	return nanstd([log10(x) if x > 0.0 else nan for x in row[column_label_list]])
 
 
-def df_percentage(row, column_label, total):
+def df_percentage(row: Series, column_label: str, total: float) -> float:
 	"""
 	Returns the value of the specified column as a percentage of the given total
 	The total is usually the sum of the specified column
@@ -153,7 +157,7 @@ def df_percentage(row, column_label, total):
 	:param column_label: column label to calculate percentage for
 	:type column_label: str
 	:param total: total value
-	:type column_label: str
+	:type total: float
 
 	:return: Percentage * 100
 	:rtype: float
@@ -162,7 +166,7 @@ def df_percentage(row, column_label, total):
 	return (row[column_label] / float(total)) * 100.0
 
 
-def df_log(row, column_label_list, base=10):
+def df_log(row: Series, column_label_list: List[str], base = 10) -> float:
 	"""
 	Calculate the logarithm of the values in each row for the specified columns of a data frame
 
@@ -190,7 +194,7 @@ def df_log(row, column_label_list, base=10):
 		return 0
 
 
-def df_data_points(row, column_label_list):
+def df_data_points(row: Series, column_label_list: List[str]) -> list:
 	"""
 	Compile the values for the specified columns in each row into a list
 
@@ -211,7 +215,7 @@ def df_data_points(row, column_label_list):
 	return [row[column_label] for column_label in column_label_list]
 
 
-def df_outliers(row, column_label_list=None, outlier_mode=MAD):
+def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode=MAD): #TODO
 	"""
 	Identify outliers in each row
 
@@ -253,7 +257,7 @@ def df_outliers(row, column_label_list=None, outlier_mode=MAD):
 	return pd.Series(list(x))
 
 
-def df_count(row, column_label_list=None):
+def df_count(row: Series, column_label_list: [str] = None) -> int:
 	"""
 	Count the number of occurrences of a non-NaN value in the specified columns of a data frame
 
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index 6966cc2..9d53a58 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -54,7 +54,7 @@
 from domdf_python_tools.doctools import is_documented_by
 
 
-def linear_regression_vertical(x, y=None, a=None, b=None):
+def linear_regression_vertical(x, y=None, a=None, b=None) -> tuple:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *vertical* distances between the points and the line.
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index d7f7b27..5669e31 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -38,9 +38,9 @@
 
 
 def mad_outliers(
-		dataset,
-		strip_zero=True,
-		threshold=3,
+		dataset: list,
+		strip_zero: bool = True,
+		threshold: int = 3,
 		):
 	"""
 	Using the Median Absolute Deviation to Find Outliers
@@ -63,7 +63,7 @@ def mad_outliers(
 		See https://dipot.ulb.ac.be/dspace/bitstream/2013/139499/1/Leys_MAD_final-libre.pdf
 	:type threshold: int
 
-	:return:
+	:return: #TODO
 	"""
 
 	dataset = utils.strip_none_bool_string(dataset)
@@ -88,26 +88,26 @@ def mad_outliers(
 	return outliers, data_exc_outliers
 
 
-def two_stdev(dataset, strip_zero=True):
+def two_stdev(dataset, strip_zero: bool = True):
 	"""
 	Outliers are greater than 2x stdev from mean
 
 	:param dataset:
 
-	:return:
+	:return: #	TODO
 	"""
 
 	return stdev_outlier(dataset, strip_zero=strip_zero)
 
 
-def stdev_outlier(dataset, strip_zero=True, rng=int(2)):
+def stdev_outlier(dataset, strip_zero: bool = True, rng=int(2)):
 	"""
 	Outliers are greater than rng*stdev from mean
 
 	:param dataset:
 	:param rng:
 
-	:return:
+	:return: 'TODO
 	"""
 
 	dataset = utils.strip_none_bool_string(dataset)
@@ -133,13 +133,13 @@ def stdev_outlier(dataset, strip_zero=True, rng=int(2)):
 	return outliers, data_exc_outliers
 
 
-def quartile_outliers(dataset, strip_zero=True):
+def quartile_outliers(dataset, strip_zero: bool = True):
 	"""
 	outliers are more than 3x inter-quartile range from upper or lower quartile
 
-	:param dataset:
+	:param dataset: #
 
-	:return:
+	:return: #TODO
 	"""
 
 	dataset = utils.strip_none_bool_string(dataset)
@@ -171,16 +171,16 @@ def quartile_outliers(dataset, strip_zero=True):
 	return outliers, data_exc_outliers
 
 
-def spss_outliers(dataset, strip_zero=True, mode="all"):
+def spss_outliers(dataset, strip_zero: bool = True, mode: str = "all"):
 	"""
 	Based on IBM SPSS method for detecting outliers
 	Outliers more than 1.5*IQR from Q1 or Q3
 	"Extreme values" more than 3*IQR from Q1 or Q3
 
 	:param dataset:
-	:param mode:
+	:param mode: str
 
-	:return:
+	:return: # TODO
 	"""
 
 	if len(dataset) < 2:
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 29bcc5a..ce26a49 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -46,13 +46,15 @@
 import warnings
 
 # 3rd party
+from typing import List
+
 import numpy
 
 # this package
 from . import utils
 
 
-def mean_none(dataset):
+def mean_none(dataset: List[str]):
 	"""
 	Calculate the mean, excluding NaN, strings, boolean values, and zeros
 
@@ -69,7 +71,7 @@ def mean_none(dataset):
 	return numpy.nanmean(dataset)
 
 
-def std_none(dataset, ddof=1):
+def std_none(dataset: List[str], ddof: int = 1):
 	"""
 	Calculate the standard deviation, excluding NaN, strings, boolean values, and zeros
 
@@ -85,10 +87,10 @@ def std_none(dataset, ddof=1):
 	dataset = utils.remove_zero(dataset)
 	print(dataset)
 
-	return numpy.nanstd(dataset, ddof=ddof)
+	return numpy.nanstd(dataset, ddof = ddof)
 
 
-def median_none(dataset):
+def median_none(dataset:List[str]):
 	"""
 	Calculate the median, excluding NaN, strings, boolean values, and zeros
 
@@ -105,7 +107,7 @@ def median_none(dataset):
 	return numpy.nanmedian(dataset)
 
 
-def iqr_none(dataset):
+def iqr_none(dataset:List[str]) -> float:
 	"""
 	Calculate the interquartile range, excluding NaN, strings, boolean values, and zeros
 
@@ -123,7 +125,7 @@ def iqr_none(dataset):
 	return iq
 
 
-def percentile_none(dataset, percentage):
+def percentile_none(dataset: List[str], percentage: float) -> float:
 	"""
 
 	Calculate the given percentile, excluding NaN, strings, boolean values, and zeros
@@ -149,7 +151,7 @@ def percentile_none(dataset, percentage):
 	return numpy.percentile(dataset, percentage)
 
 
-def pooled_sd(sample1, sample2, weighted=False):
+def pooled_sd(sample1: List, sample2: List, weighted: List = False) -> float:
 	"""
 	Pooled Standard Deviation
 
@@ -176,7 +178,7 @@ def pooled_sd(sample1, sample2, weighted=False):
 		return numpy.sqrt(((sd1**2) + (sd2**2)) / 2)
 
 
-def d_cohen(sample1, sample2, sd=1, tail=1, pooled=False):
+def d_cohen(sample1: List, sample2: List, sd: int = 1, tail = 1, pooled = False) -> float:
 	"""
 	Cohen's d-Statistic
 
@@ -212,7 +214,7 @@ def d_cohen(sample1, sample2, sd=1, tail=1, pooled=False):
 	return (mean1 - mean2) / sd
 
 
-def g_hedge(sample1, sample2):
+def g_hedge(sample1: List, sample2: List): #TODO
 	"""
 	Hedge's g-Statistic
 
@@ -229,7 +231,7 @@ def g_hedge(sample1, sample2):
 	return (mean1 - mean2) / pooled_sd(sample1, sample2, True)
 
 
-def g_durlak_bias(g, n):
+def g_durlak_bias(g :float, n: float) -> float: #TODO
 	"""
 	Application of Durlak's bias correction to the Hedge's g statistic.
 	Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm
@@ -249,7 +251,7 @@ def g_durlak_bias(g, n):
 	return g * Durlak
 
 
-def interpret_d(d_or_g):
+def interpret_d(d_or_g: float) -> float:
 	"""
 	Interpret Cohen's d or Hedge's g values using Table 1
 	from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3444174/
@@ -273,7 +275,7 @@ def interpret_d(d_or_g):
 		return "Large Effect"
 
 
-def _contains_nan(a, nan_policy='propagate'):
+def _contains_nan(a, nan_policy = 'propagate'):
 	policies = ['propagate', 'raise', 'omit']
 	if nan_policy not in policies:
 		raise ValueError("nan_policy must be one of {%s}" % ', '.join(f"'{s}'" for s in policies))
@@ -304,7 +306,7 @@ def _contains_nan(a, nan_policy='propagate'):
 	return contains_nan, nan_policy
 
 
-def median_absolute_deviation(x, axis=0, center=numpy.median, scale=1.4826, nan_policy='propagate'):
+def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy = 'propagate'): #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
@@ -387,7 +389,7 @@ def median_absolute_deviation(x, axis=0, center=numpy.median, scale=1.4826, nan_
 	return scale * mad
 
 
-def absolute_deviation(x, axis=0, center=numpy.median, nan_policy='propagate'):
+def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy='propagate'): #TODO
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -447,7 +449,7 @@ def absolute_deviation(x, axis=0, center=numpy.median, nan_policy='propagate'):
 	return ad
 
 
-def absolute_deviation_from_median(x, axis=0, center=numpy.median, scale=1.4826, nan_policy='propagate'):
+def absolute_deviation_from_median(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy = 'propagate'):
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.
@@ -499,7 +501,7 @@ def absolute_deviation_from_median(x, axis=0, center=numpy.median, scale=1.4826,
 	return ad_from_median
 
 
-def within1min(value1, value2):
+def within1min(value1: float, value2: float):
 	if value1 not in [0, None, ''] and value2 not in [0, None, '']:
 		return (float(value1) - 1) < (float(value2)) < (float(value1) + 1)
 	else:
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 6fd0c1d..854506c 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -78,6 +78,7 @@
 #
 
 # stdlib
+import decimal
 import math
 from operator import eq, ge, gt, le, lt, ne
 
@@ -85,7 +86,7 @@
 import numpy
 
 
-def intdiv(p, q):
+def intdiv(p: float, q: float) -> int:
 	"""
 	Integer divsions which rounds toward zero
 
@@ -105,7 +106,7 @@ def intdiv(p, q):
 	return r
 
 
-def roman(num):
+def roman(num: float) -> str:
 	"""
 	Examples
 	--------
@@ -125,7 +126,7 @@ def roman(num):
 	return result
 
 
-def magnitude(x):
+def magnitude(x: float) -> int:
 	"""
 	Determine the magnitude of the given value
 
@@ -146,7 +147,7 @@ def magnitude(x):
 # 	return int(math.floor(math.log10(abs(num))))
 
 
-def remove_zero(inputlist):
+def remove_zero(inputlist: list)-> list:
 	"""
 	Remove zero values from the given list
 	Also removes False and None
@@ -162,7 +163,7 @@ def remove_zero(inputlist):
 	return list(inputlist[numpy.nonzero(inputlist)])
 
 
-def isint(num):  # Only works with floating point numbers
+def isint(num: float) -> bool:  # Only works with floating point numbers
 	"""
 	Checks whether a float is an integer value
 
@@ -175,7 +176,7 @@ def isint(num):  # Only works with floating point numbers
 	return num == int(num)
 
 
-def RepresentsInt(s):
+def RepresentsInt(s: bool):
 	"""
 	Checks whether a value can be converted to int
 
@@ -190,7 +191,7 @@ def RepresentsInt(s):
 		return False
 
 
-def rounders(val_to_round, round_format):
+def rounders(val_to_round: int, round_format: str) -> decimal:
 	"""
 	Round a value to the specified number format, e.g. "0.000" for three decimal places
 
@@ -206,7 +207,7 @@ def rounders(val_to_round, round_format):
 	return Decimal(Decimal(val_to_round).quantize(Decimal(str(round_format)), rounding=ROUND_HALF_UP))
 
 
-def strip_strings(ls):
+def strip_strings(ls: list) -> list:
 	"""
 	Remove strings from a list
 
@@ -220,7 +221,7 @@ def strip_strings(ls):
 	return [x for x in ls if not isinstance(x, str)]
 
 
-def strip_booleans(ls):
+def strip_booleans(ls: list) -> list:
 	"""
 	Remove booleans from a list
 
@@ -234,7 +235,7 @@ def strip_booleans(ls):
 	return [x for x in ls if not isinstance(x, bool)]
 
 
-def strip_nonetype(ls):
+def strip_nonetype(ls: list) -> list:
 	"""
 	Remove None from a list
 
@@ -248,7 +249,7 @@ def strip_nonetype(ls):
 	return [x for x in ls if x is not None]
 
 
-def strip_none_bool_string(ls):
+def strip_none_bool_string(ls: list) -> list:
 	"""
 	Remove None, Boolean and strings from a list
 
@@ -264,7 +265,7 @@ def strip_none_bool_string(ls):
 	return ls
 
 
-def gcd(a, b):
+def gcd(a: float, b: float) -> float:
 	"""
 	Returns the GCD (HCF) of a and b using Euclid's Algorithm
 
@@ -280,7 +281,7 @@ def gcd(a, b):
 	return math.gcd(a, b)
 
 
-def gcd_array(array):
+def gcd_array(array) -> float:
 	"""
 	Returns the GCD for an array of numbers using Euclid's Algorithm
 
@@ -289,7 +290,7 @@ def gcd_array(array):
 	:param array:
 	:type array:
 	:return:
-	:rtype:
+	:rtype: float
 	"""
 
 	a = array[0]
@@ -302,13 +303,13 @@ def gcd_array(array):
 	return x
 
 
-def gcd2(numbers):
+def gcd2(numbers: float) -> float:
 	"""
 	Returns the GCD (HCF) of a list of numbers using Euclid's Algorithm
 
 	:param numbers:
 
-	:return:
+	:return:float
 	"""
 
 	c = numbers[0]
@@ -317,12 +318,12 @@ def gcd2(numbers):
 	return c
 
 
-def lcm(numbers):
+def lcm(numbers: float) -> float:
 	"""
 	Returns the LCM of a list of numbers using Euclid's Algorithm
 	:param numbers:
 
-	:return:
+	:return: float
 	"""
 
 	product = numbers[0]
@@ -336,30 +337,30 @@ def lcm(numbers):
 		return product
 
 
-def hcf(a, b):
+def hcf(a: float, b: float):
 	"""
 
 	:param a:
 	:param b:
 
-	:return:
+	:return:float
 	"""
 
 	gcd(a, b)
 
 
-def hcf2(numbers):
+def hcf2(numbers: float) -> float:
 	"""
 
 	:param numbers:
 
-	:return:
+	:return:float
 	"""
 
 	gcd2(numbers)
 
 
-def modInverse(a, m):
+def modInverse(a: float, m: float):
 	"""
 	Returns the modular inverse of a % m, which is the number x such that a*x % m = 1
 	:param a:
@@ -385,7 +386,7 @@ def modInverse(a, m):
 _precalc_fact = numpy.log([math.factorial(n) for n in range(20)])
 
 
-def log_factorial(x):
+def log_factorial(x: float)-> float:
 	x = numpy.array(x)
 	pf = _precalc_fact
 	m = (x >= pf.size)
@@ -396,15 +397,15 @@ def log_factorial(x):
 	return out
 
 
-def _log_pi_r(d, k, p=0.5):
+def _log_pi_r(d: float, k: float, p: float = 0.5) -> float:
 	return k * math.log(p) + log_factorial(k + d) - log_factorial(k) - log_factorial(d)
 
 
-def _log_pi(d, k, p=0.5):
+def _log_pi(d: float, k: float, p: float = 0.5) -> float:
 	return _log_pi_r(d, k, p) + (d + 1) * math.log(1 - p)
 
 
-def _expectation(d, T, p=0.5):
+def _expectation(d: float, T: float, p: float = 0.5):
 	if T is None:
 		return d + 1
 	T = numpy.array(T, dtype=int)
@@ -413,7 +414,7 @@ def _expectation(d, T, p=0.5):
 	return ((m * pi).cumsum() / pi.cumsum())[T]
 
 
-def _confidence_value(conf, d, T, p=0.5):
+def _confidence_value(conf: float, d: float, T: float, p: float = 0.5) :
 	if T is not None:
 		T = numpy.array(T, dtype=int)
 		m = numpy.arange(T.max() + 1, dtype=int)

From 5a9b9bc77c98dcc53f38c3f3e22384a4b174027f Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Thu, 4 Jun 2020 09:01:24 +0100
Subject: [PATCH 02/11] Added type annotations

---
 mathematical/data_frames.py       | 24 ++++++++++++------------
 mathematical/linear_regression.py |  2 +-
 mathematical/outliers.py          |  2 +-
 mathematical/stats.py             | 22 +++++++++++-----------
 mathematical/utils.py             | 18 ++++++++++++------
 5 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 1b4b67c..6705f71 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -37,7 +37,7 @@
 STDEV2 = 3
 
 
-def df_mean(row: Series, column_label_list: List[str] = None) -> float: #TODO
+def df_mean(row: Series, column_label_list: List[str] = None) -> float:
 	"""
 	Calculate the mean of each row for the specified columns of a data frame
 
@@ -60,10 +60,10 @@ def df_mean(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	if column_label_list is None:
 		column_label_list = list(row.index)
 
-	return nanmean(row[column_label_list])
+	return float(nanmean(row[column_label_list]))
 
 
-def df_median(row: Series, column_label_list: List[str] = None) -> float: #TODO
+def df_median(row: Series, column_label_list: List[str] = None) -> float:
 	"""
 	Calculate the median of each row for the specified columns of a data frame
 
@@ -86,10 +86,10 @@ def df_median(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	if column_label_list is None:
 		column_label_list = list(row.index)
 
-	return nanmedian(row[column_label_list])
+	return float(nanmedian(row[column_label_list]))
 
 
-def df_stdev(row: Series, column_label_list: List[str] = None) -> float: #TODO
+def df_stdev(row: Series, column_label_list: List[str] = None) -> float:
 	"""
 	Calculate the standard deviation of each row for the specified columns of a data frame
 
@@ -112,10 +112,10 @@ def df_stdev(row: Series, column_label_list: List[str] = None) -> float: #TODO
 	if column_label_list is None:
 		column_label_list = list(row.index)
 
-	return nanstd(row[column_label_list])
+	return float(nanstd(row[column_label_list]))
 
 
-def df_log_stdev(row: Series, column_label_list: List[str] = None) -> float: #TODO
+def df_log_stdev(row: Series, column_label_list: List[str] = None) -> float:
 	"""
 	Calculate the standard deviation of the log10 values in each row for the specified columns of a data frame
 
@@ -139,7 +139,7 @@ def df_log_stdev(row: Series, column_label_list: List[str] = None) -> float: #TO
 	if column_label_list is None:
 		column_label_list = list(row.index)
 
-	return nanstd([log10(x) if x > 0.0 else nan for x in row[column_label_list]])
+	return float(nanstd([log10(x) if x > 0.0 else nan for x in row[column_label_list]]))
 
 
 def df_percentage(row: Series, column_label: str, total: float) -> float:
@@ -166,7 +166,7 @@ def df_percentage(row: Series, column_label: str, total: float) -> float:
 	return (row[column_label] / float(total)) * 100.0
 
 
-def df_log(row: Series, column_label_list: List[str], base = 10) -> float:
+def df_log(row: Series, column_label_list: List[str], base: float = 10) -> float:
 	"""
 	Calculate the logarithm of the values in each row for the specified columns of a data frame
 
@@ -194,7 +194,7 @@ def df_log(row: Series, column_label_list: List[str], base = 10) -> float:
 		return 0
 
 
-def df_data_points(row: Series, column_label_list: List[str]) -> list:
+def df_data_points(row: Series, column_label_list: List[str]) -> List:
 	"""
 	Compile the values for the specified columns in each row into a list
 
@@ -215,7 +215,7 @@ def df_data_points(row: Series, column_label_list: List[str]) -> list:
 	return [row[column_label] for column_label in column_label_list]
 
 
-def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode=MAD): #TODO
+def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode: int = MAD) -> Series:
 	"""
 	Identify outliers in each row
 
@@ -254,7 +254,7 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode=M
 	else:
 		return None
 
-	return pd.Series(list(x))
+	return Series(list(x))
 
 
 def df_count(row: Series, column_label_list: [str] = None) -> int:
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index 9d53a58..e0c6ec0 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -54,7 +54,7 @@
 from domdf_python_tools.doctools import is_documented_by
 
 
-def linear_regression_vertical(x, y=None, a=None, b=None) -> tuple:
+def linear_regression_vertical(x, y = None, a = None, b = None) -> tuple:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *vertical* distances between the points and the line.
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index 5669e31..ff513f3 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -38,7 +38,7 @@
 
 
 def mad_outliers(
-		dataset: list,
+		dataset: List,
 		strip_zero: bool = True,
 		threshold: int = 3,
 		):
diff --git a/mathematical/stats.py b/mathematical/stats.py
index ce26a49..17237c3 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -54,7 +54,7 @@
 from . import utils
 
 
-def mean_none(dataset: List[str]):
+def mean_none(dataset: List) -> float:
 	"""
 	Calculate the mean, excluding NaN, strings, boolean values, and zeros
 
@@ -68,10 +68,10 @@ def mean_none(dataset: List[str]):
 	dataset = utils.strip_none_bool_string(dataset)
 	dataset = utils.remove_zero(dataset)
 
-	return numpy.nanmean(dataset)
+	return float(numpy.nanmean(dataset))
 
 
-def std_none(dataset: List[str], ddof: int = 1):
+def std_none(dataset: List[str], ddof: int = 1) -> float:
 	"""
 	Calculate the standard deviation, excluding NaN, strings, boolean values, and zeros
 
@@ -87,7 +87,7 @@ def std_none(dataset: List[str], ddof: int = 1):
 	dataset = utils.remove_zero(dataset)
 	print(dataset)
 
-	return numpy.nanstd(dataset, ddof = ddof)
+	return float(numpy.nanstd(dataset, ddof=ddof))
 
 
 def median_none(dataset:List[str]):
@@ -122,7 +122,7 @@ def iqr_none(dataset:List[str]) -> float:
 	q3 = percentile_none(dataset, 75)
 	iq = q3 - q1
 
-	return iq
+	return float(iq)
 
 
 def percentile_none(dataset: List[str], percentage: float) -> float:
@@ -148,7 +148,7 @@ def percentile_none(dataset: List[str], percentage: float) -> float:
 	if len(dataset) < 2:
 		raise ValueError("Dataset too small")
 
-	return numpy.percentile(dataset, percentage)
+	return float(numpy.percentile(dataset, percentage))
 
 
 def pooled_sd(sample1: List, sample2: List, weighted: List = False) -> float:
@@ -275,7 +275,7 @@ def interpret_d(d_or_g: float) -> float:
 		return "Large Effect"
 
 
-def _contains_nan(a, nan_policy = 'propagate'):
+def _contains_nan(a, nan_policy:str = 'propagate'):
 	policies = ['propagate', 'raise', 'omit']
 	if nan_policy not in policies:
 		raise ValueError("nan_policy must be one of {%s}" % ', '.join(f"'{s}'" for s in policies))
@@ -306,7 +306,7 @@ def _contains_nan(a, nan_policy = 'propagate'):
 	return contains_nan, nan_policy
 
 
-def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy = 'propagate'): #TODO
+def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'): #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
@@ -389,7 +389,7 @@ def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: in
 	return scale * mad
 
 
-def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy='propagate'): #TODO
+def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy: str = 'propagate'): #TODO
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -449,7 +449,7 @@ def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy='propag
 	return ad
 
 
-def absolute_deviation_from_median(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy = 'propagate'):
+def absolute_deviation_from_median(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'):
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.
@@ -501,7 +501,7 @@ def absolute_deviation_from_median(x, axis: int = 0, center = numpy.median, scal
 	return ad_from_median
 
 
-def within1min(value1: float, value2: float):
+def within1min(value1: float, value2: float) -> bool:
 	if value1 not in [0, None, ''] and value2 not in [0, None, '']:
 		return (float(value1) - 1) < (float(value2)) < (float(value1) + 1)
 	else:
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 854506c..0099bc9 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -83,6 +83,8 @@
 from operator import eq, ge, gt, le, lt, ne
 
 # 3rd party
+from typing import List
+
 import numpy
 
 
@@ -147,7 +149,7 @@ def magnitude(x: float) -> int:
 # 	return int(math.floor(math.log10(abs(num))))
 
 
-def remove_zero(inputlist: list)-> list:
+def remove_zero(inputlist: List)-> List:
 	"""
 	Remove zero values from the given list
 	Also removes False and None
@@ -191,7 +193,11 @@ def RepresentsInt(s: bool):
 		return False
 
 
-def rounders(val_to_round: int, round_format: str) -> decimal:
+class Decimal(object):
+	pass
+
+
+def rounders(val_to_round: int, round_format: str) -> Decimal:
 	"""
 	Round a value to the specified number format, e.g. "0.000" for three decimal places
 
@@ -207,7 +213,7 @@ def rounders(val_to_round: int, round_format: str) -> decimal:
 	return Decimal(Decimal(val_to_round).quantize(Decimal(str(round_format)), rounding=ROUND_HALF_UP))
 
 
-def strip_strings(ls: list) -> list:
+def strip_strings(ls: List) -> List:
 	"""
 	Remove strings from a list
 
@@ -221,7 +227,7 @@ def strip_strings(ls: list) -> list:
 	return [x for x in ls if not isinstance(x, str)]
 
 
-def strip_booleans(ls: list) -> list:
+def strip_booleans(ls: List) -> List:
 	"""
 	Remove booleans from a list
 
@@ -235,7 +241,7 @@ def strip_booleans(ls: list) -> list:
 	return [x for x in ls if not isinstance(x, bool)]
 
 
-def strip_nonetype(ls: list) -> list:
+def strip_nonetype(ls: List) -> List:
 	"""
 	Remove None from a list
 
@@ -249,7 +255,7 @@ def strip_nonetype(ls: list) -> list:
 	return [x for x in ls if x is not None]
 
 
-def strip_none_bool_string(ls: list) -> list:
+def strip_none_bool_string(ls: List) -> List:
 	"""
 	Remove None, Boolean and strings from a list
 

From 2a8d795c2e473642da1da3510e13ff373ab953b0 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Thu, 4 Jun 2020 19:51:40 +0100
Subject: [PATCH 03/11] Added type annotations

---
 mathematical/data_frames.py       |  2 +-
 mathematical/linear_regression.py |  4 +++-
 mathematical/outliers.py          | 13 +++++++------
 mathematical/stats.py             | 28 ++++++++++++++--------------
 mathematical/utils.py             | 19 +++++++++----------
 5 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 6705f71..47c72be 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -257,7 +257,7 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode:
 	return Series(list(x))
 
 
-def df_count(row: Series, column_label_list: [str] = None) -> int:
+def df_count(row: Series, column_label_list: List[str] = None) -> int:
 	"""
 	Count the number of occurrences of a non-NaN value in the specified columns of a data frame
 
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index e0c6ec0..442c115 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -50,11 +50,13 @@
 #
 
 # 3rd party
+from typing import Tuple
+
 import numpy
 from domdf_python_tools.doctools import is_documented_by
 
 
-def linear_regression_vertical(x, y = None, a = None, b = None) -> tuple:
+def linear_regression_vertical(x: numpy.ndarray, y: numpy.ndarray = None, a = None, b = None) -> Tuple[float, float, float, float]:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *vertical* distances between the points and the line.
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index ff513f3..19a397b 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -31,6 +31,7 @@
 #  MA 02110-1301, USA.
 #
 #
+from typing import Sequence, Tuple, List
 
 import numpy
 from . import utils
@@ -38,10 +39,10 @@
 
 
 def mad_outliers(
-		dataset: List,
+		dataset: Sequence[float],
 		strip_zero: bool = True,
 		threshold: int = 3,
-		):
+		) -> Tuple[List[float], List[float]]:
 	"""
 	Using the Median Absolute Deviation to Find Outliers
 
@@ -88,7 +89,7 @@ def mad_outliers(
 	return outliers, data_exc_outliers
 
 
-def two_stdev(dataset, strip_zero: bool = True):
+def two_stdev(dataset: Sequence[float], strip_zero: bool = True):
 	"""
 	Outliers are greater than 2x stdev from mean
 
@@ -100,7 +101,7 @@ def two_stdev(dataset, strip_zero: bool = True):
 	return stdev_outlier(dataset, strip_zero=strip_zero)
 
 
-def stdev_outlier(dataset, strip_zero: bool = True, rng=int(2)):
+def stdev_outlier(dataset: Sequence[float], strip_zero: bool = True, rng=int(2)):
 	"""
 	Outliers are greater than rng*stdev from mean
 
@@ -133,7 +134,7 @@ def stdev_outlier(dataset, strip_zero: bool = True, rng=int(2)):
 	return outliers, data_exc_outliers
 
 
-def quartile_outliers(dataset, strip_zero: bool = True):
+def quartile_outliers(dataset: Sequence[float], strip_zero: bool = True):
 	"""
 	outliers are more than 3x inter-quartile range from upper or lower quartile
 
@@ -171,7 +172,7 @@ def quartile_outliers(dataset, strip_zero: bool = True):
 	return outliers, data_exc_outliers
 
 
-def spss_outliers(dataset, strip_zero: bool = True, mode: str = "all"):
+def spss_outliers(dataset: Sequence[float], strip_zero: bool = True, mode: str = "all"):
 	"""
 	Based on IBM SPSS method for detecting outliers
 	Outliers more than 1.5*IQR from Q1 or Q3
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 17237c3..828824b 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -46,7 +46,7 @@
 import warnings
 
 # 3rd party
-from typing import List
+from typing import List, Sequence, Callable
 
 import numpy
 
@@ -54,7 +54,7 @@
 from . import utils
 
 
-def mean_none(dataset: List) -> float:
+def mean_none(dataset: Sequence[float]) -> float:
 	"""
 	Calculate the mean, excluding NaN, strings, boolean values, and zeros
 
@@ -71,7 +71,7 @@ def mean_none(dataset: List) -> float:
 	return float(numpy.nanmean(dataset))
 
 
-def std_none(dataset: List[str], ddof: int = 1) -> float:
+def std_none(dataset: Sequence[float], ddof: int = 1) -> float:
 	"""
 	Calculate the standard deviation, excluding NaN, strings, boolean values, and zeros
 
@@ -90,7 +90,7 @@ def std_none(dataset: List[str], ddof: int = 1) -> float:
 	return float(numpy.nanstd(dataset, ddof=ddof))
 
 
-def median_none(dataset:List[str]):
+def median_none(dataset: Sequence[float]):
 	"""
 	Calculate the median, excluding NaN, strings, boolean values, and zeros
 
@@ -107,7 +107,7 @@ def median_none(dataset:List[str]):
 	return numpy.nanmedian(dataset)
 
 
-def iqr_none(dataset:List[str]) -> float:
+def iqr_none(dataset: Sequence[float]) -> float:
 	"""
 	Calculate the interquartile range, excluding NaN, strings, boolean values, and zeros
 
@@ -125,7 +125,7 @@ def iqr_none(dataset:List[str]) -> float:
 	return float(iq)
 
 
-def percentile_none(dataset: List[str], percentage: float) -> float:
+def percentile_none(dataset: Sequence[float], percentage: float) -> float:
 	"""
 
 	Calculate the given percentile, excluding NaN, strings, boolean values, and zeros
@@ -151,7 +151,7 @@ def percentile_none(dataset: List[str], percentage: float) -> float:
 	return float(numpy.percentile(dataset, percentage))
 
 
-def pooled_sd(sample1: List, sample2: List, weighted: List = False) -> float:
+def pooled_sd(sample1: Sequence[float], sample2: Sequence[float], weighted: bool = False) -> float:
 	"""
 	Pooled Standard Deviation
 
@@ -178,7 +178,7 @@ def pooled_sd(sample1: List, sample2: List, weighted: List = False) -> float:
 		return numpy.sqrt(((sd1**2) + (sd2**2)) / 2)
 
 
-def d_cohen(sample1: List, sample2: List, sd: int = 1, tail = 1, pooled = False) -> float:
+def d_cohen(sample1: Sequence[float], sample2:Sequence[float], sd: int = 1, tail = 1, pooled: bool = False) -> float:
 	"""
 	Cohen's d-Statistic
 
@@ -214,7 +214,7 @@ def d_cohen(sample1: List, sample2: List, sd: int = 1, tail = 1, pooled = False)
 	return (mean1 - mean2) / sd
 
 
-def g_hedge(sample1: List, sample2: List): #TODO
+def g_hedge(sample1: Sequence[float], sample2: Sequence[float]) -> float:
 	"""
 	Hedge's g-Statistic
 
@@ -231,7 +231,7 @@ def g_hedge(sample1: List, sample2: List): #TODO
 	return (mean1 - mean2) / pooled_sd(sample1, sample2, True)
 
 
-def g_durlak_bias(g :float, n: float) -> float: #TODO
+def g_durlak_bias(g: float, n: float) -> float:
 	"""
 	Application of Durlak's bias correction to the Hedge's g statistic.
 	Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm
@@ -275,7 +275,7 @@ def interpret_d(d_or_g: float) -> float:
 		return "Large Effect"
 
 
-def _contains_nan(a, nan_policy:str = 'propagate'):
+def _contains_nan(a, nan_policy: str = 'propagate'):
 	policies = ['propagate', 'raise', 'omit']
 	if nan_policy not in policies:
 		raise ValueError("nan_policy must be one of {%s}" % ', '.join(f"'{s}'" for s in policies))
@@ -306,7 +306,7 @@ def _contains_nan(a, nan_policy:str = 'propagate'):
 	return contains_nan, nan_policy
 
 
-def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'): #TODO
+def median_absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'): #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
@@ -389,7 +389,7 @@ def median_absolute_deviation(x, axis: int = 0, center = numpy.median, scale: in
 	return scale * mad
 
 
-def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy: str = 'propagate'): #TODO
+def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_policy: str = 'propagate'): #TODO
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -449,7 +449,7 @@ def absolute_deviation(x, axis: int = 0, center=numpy.median, nan_policy: str =
 	return ad
 
 
-def absolute_deviation_from_median(x, axis: int = 0, center = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'):
+def absolute_deviation_from_median(x, axis: int = 0, center: Callable = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'):
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 0099bc9..90606d7 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -83,7 +83,7 @@
 from operator import eq, ge, gt, le, lt, ne
 
 # 3rd party
-from typing import List
+from typing import List, Sequence, Any, Union, Optional
 
 import numpy
 
@@ -149,7 +149,7 @@ def magnitude(x: float) -> int:
 # 	return int(math.floor(math.log10(abs(num))))
 
 
-def remove_zero(inputlist: List)-> List:
+def remove_zero(inputlist: Sequence[float]) -> List[float]:
 	"""
 	Remove zero values from the given list
 	Also removes False and None
@@ -178,7 +178,7 @@ def isint(num: float) -> bool:  # Only works with floating point numbers
 	return num == int(num)
 
 
-def RepresentsInt(s: bool):
+def RepresentsInt(s: Any) -> bool:
 	"""
 	Checks whether a value can be converted to int
 
@@ -192,12 +192,11 @@ def RepresentsInt(s: bool):
 	except (ValueError, TypeError) as e:
 		return False
 
+from decimal import Decimal
 
-class Decimal(object):
-	pass
 
 
-def rounders(val_to_round: int, round_format: str) -> Decimal:
+def rounders(val_to_round: Union[str, float, Decimal], round_format: str) -> Decimal:
 	"""
 	Round a value to the specified number format, e.g. "0.000" for three decimal places
 
@@ -309,7 +308,7 @@ def gcd_array(array) -> float:
 	return x
 
 
-def gcd2(numbers: float) -> float:
+def gcd2(numbers: Sequence[float]) -> float:
 	"""
 	Returns the GCD (HCF) of a list of numbers using Euclid's Algorithm
 
@@ -324,7 +323,7 @@ def gcd2(numbers: float) -> float:
 	return c
 
 
-def lcm(numbers: float) -> float:
+def lcm(numbers:Sequence[float]) -> float:
 	"""
 	Returns the LCM of a list of numbers using Euclid's Algorithm
 	:param numbers:
@@ -343,7 +342,7 @@ def lcm(numbers: float) -> float:
 		return product
 
 
-def hcf(a: float, b: float):
+def hcf(a: float, b: float) -> float:
 	"""
 
 	:param a:
@@ -366,7 +365,7 @@ def hcf2(numbers: float) -> float:
 	gcd2(numbers)
 
 
-def modInverse(a: float, m: float):
+def modInverse(a: float, m: float) -> Optional[float]:
 	"""
 	Returns the modular inverse of a % m, which is the number x such that a*x % m = 1
 	:param a:

From 1e50a3ec66c33136cdf5c525ca9af1b778a3ee03 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Thu, 4 Jun 2020 22:03:49 +0100
Subject: [PATCH 04/11] Added type annotations

---
 mathematical/data_frames.py       | 4 ++--
 mathematical/linear_regression.py | 4 ++--
 mathematical/outliers.py          | 2 +-
 mathematical/stats.py             | 4 ++--
 mathematical/utils.py             | 2 +-
 tests/test_data_frames.py         | 4 ++--
 tests/test_linear_regression.py   | 4 ++--
 tests/test_stats.py               | 4 ++--
 tests/test_utils.py               | 4 ++--
 9 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 47c72be..7d27cae 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -30,7 +30,7 @@
 # Outlier Modes
 from typing import List
 
-from pandas import Series
+from pandas import Series  # type: ignore
 
 MAD = 1
 QUARTILES = 2
@@ -55,7 +55,7 @@ def df_mean(row: Series, column_label_list: List[str] = None) -> float:
 	:rtype: float
 	"""
 
-	from numpy import nanmean
+	from numpy import nanmean  # type: ignore
 
 	if column_label_list is None:
 		column_label_list = list(row.index)
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index 442c115..60d4908 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -52,8 +52,8 @@
 # 3rd party
 from typing import Tuple
 
-import numpy
-from domdf_python_tools.doctools import is_documented_by
+import numpy  # type: ignore
+from domdf_python_tools.doctools import is_documented_by  # type: ignore
 
 
 def linear_regression_vertical(x: numpy.ndarray, y: numpy.ndarray = None, a = None, b = None) -> Tuple[float, float, float, float]:
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index 19a397b..baf6f75 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -33,7 +33,7 @@
 #
 from typing import Sequence, Tuple, List
 
-import numpy
+import numpy  # type: ignore
 from . import utils
 from . import stats
 
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 828824b..82707a3 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -43,12 +43,12 @@
 #
 
 # stdlib
-import warnings
+import warnings  # type: ignore
 
 # 3rd party
 from typing import List, Sequence, Callable
 
-import numpy
+import numpy  # type: ignore
 
 # this package
 from . import utils
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 90606d7..1203978 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -85,7 +85,7 @@
 # 3rd party
 from typing import List, Sequence, Any, Union, Optional
 
-import numpy
+import numpy  # type: ignore
 
 
 def intdiv(p: float, q: float) -> int:
diff --git a/tests/test_data_frames.py b/tests/test_data_frames.py
index 419edd2..e43c348 100644
--- a/tests/test_data_frames.py
+++ b/tests/test_data_frames.py
@@ -9,8 +9,8 @@
 
 import copy
 
-import pandas
-import pytest
+import pandas  # type: ignore
+import pytest  # type: ignore
 
 from mathematical.data_frames import (
 		df_count,
diff --git a/tests/test_linear_regression.py b/tests/test_linear_regression.py
index e67031d..c0afc82 100644
--- a/tests/test_linear_regression.py
+++ b/tests/test_linear_regression.py
@@ -50,8 +50,8 @@
 from itertools import count
 
 # 3rd party
-import numpy
-import pytest
+import numpy  # type: ignore
+import pytest  # type: ignore
 
 # this package
 from mathematical import linear_regression
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 9bae0fc..0e0fd8e 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -6,7 +6,7 @@
 Test functions in stats.py
 
 """
-import numpy
+import numpy  # type: ignore
 from mathematical import stats
 
 data = [1, 2, 3, 4, 5, 0, "abc", False, None, numpy.nan]
@@ -43,7 +43,7 @@ def test_iqr_none():
 
 def test_mad():
 	# Based on example from scipy.median_absolute_deviation docstring
-	import scipy.stats
+	import scipy.stats  # type: ignore
 	x = scipy.stats.norm.rvs(size=100, scale=1, random_state=123456)
 	assert isinstance(stats.median_absolute_deviation(x), float)
 	assert stats.median_absolute_deviation(x) == 1.2280762773108278
diff --git a/tests/test_utils.py b/tests/test_utils.py
index da657c4..15e9ac0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -50,8 +50,8 @@
 #  |  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 
-import decimal
-import numpy
+import decimal  # type: ignore
+import numpy  # type: ignore
 
 from mathematical import utils
 

From d96b0173cbdb76e0bce0a86ce93ff2bcedd2f836 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Sat, 6 Jun 2020 19:05:50 +0100
Subject: [PATCH 05/11] Added type annotations

---
 mathematical/data_frames.py |  2 +-
 mathematical/stats.py       | 19 ++++++++++---------
 mathematical/utils.py       | 26 +++++++++++++-------------
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 7d27cae..70835ca 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -250,7 +250,7 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode:
 	elif outlier_mode == QUARTILES:
 		x = outliers.quartile_outliers(data)
 	elif outlier_mode == STDEV2:
-		x = outliers.stdev_outlier(data, 2)  # outlier classed as more than 2 stdev away from mean
+		x = outliers.stdev_outlier(data, rng=2)  # outlier classed as more than 2 stdev away from mean
 	else:
 		return None
 
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 82707a3..4e15902 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -46,7 +46,7 @@
 import warnings  # type: ignore
 
 # 3rd party
-from typing import List, Sequence, Callable
+from typing import List, Sequence, Callable, Union, Optional
 
 import numpy  # type: ignore
 
@@ -54,7 +54,7 @@
 from . import utils
 
 
-def mean_none(dataset: Sequence[float]) -> float:
+def mean_none(dataset: Sequence[Union[float, bool, None]]) -> float:
 	"""
 	Calculate the mean, excluding NaN, strings, boolean values, and zeros
 
@@ -71,7 +71,7 @@ def mean_none(dataset: Sequence[float]) -> float:
 	return float(numpy.nanmean(dataset))
 
 
-def std_none(dataset: Sequence[float], ddof: int = 1) -> float:
+def std_none(dataset: Sequence[Union[float, bool, None]], ddof: int = 1) -> float:
 	"""
 	Calculate the standard deviation, excluding NaN, strings, boolean values, and zeros
 
@@ -90,7 +90,7 @@ def std_none(dataset: Sequence[float], ddof: int = 1) -> float:
 	return float(numpy.nanstd(dataset, ddof=ddof))
 
 
-def median_none(dataset: Sequence[float]):
+def median_none(dataset: Sequence[Union[float, bool, None]]):
 	"""
 	Calculate the median, excluding NaN, strings, boolean values, and zeros
 
@@ -107,7 +107,7 @@ def median_none(dataset: Sequence[float]):
 	return numpy.nanmedian(dataset)
 
 
-def iqr_none(dataset: Sequence[float]) -> float:
+def iqr_none(dataset: Sequence[Union[float, bool, None]]) -> float:
 	"""
 	Calculate the interquartile range, excluding NaN, strings, boolean values, and zeros
 
@@ -125,7 +125,7 @@ def iqr_none(dataset: Sequence[float]) -> float:
 	return float(iq)
 
 
-def percentile_none(dataset: Sequence[float], percentage: float) -> float:
+def percentile_none(dataset: Sequence[Union[float, bool, None]], percentage: float) -> float:
 	"""
 
 	Calculate the given percentile, excluding NaN, strings, boolean values, and zeros
@@ -251,7 +251,7 @@ def g_durlak_bias(g: float, n: float) -> float:
 	return g * Durlak
 
 
-def interpret_d(d_or_g: float) -> float:
+def interpret_d(d_or_g: float) -> Optional[str]:
 	"""
 	Interpret Cohen's d or Hedge's g values using Table 1
 	from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3444174/
@@ -273,6 +273,7 @@ def interpret_d(d_or_g: float) -> float:
 		return "Intermediate Effect"
 	elif 0.8 <= d_or_g:
 		return "Large Effect"
+	return None
 
 
 def _contains_nan(a, nan_policy: str = 'propagate'):
@@ -306,7 +307,7 @@ def _contains_nan(a, nan_policy: str = 'propagate'):
 	return contains_nan, nan_policy
 
 
-def median_absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'): #TODO
+def median_absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'): #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
@@ -449,7 +450,7 @@ def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_po
 	return ad
 
 
-def absolute_deviation_from_median(x, axis: int = 0, center: Callable = numpy.median, scale: int = 1.4826, nan_policy: str = 'propagate'):
+def absolute_deviation_from_median(x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'):
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 1203978..c35793b 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -105,7 +105,7 @@ def intdiv(p: float, q: float) -> int:
 	r = p // q
 	if r < 0 and q * r != p:
 		r += 1
-	return r
+	return int(r)
 
 
 def roman(num: float) -> str:
@@ -149,7 +149,7 @@ def magnitude(x: float) -> int:
 # 	return int(math.floor(math.log10(abs(num))))
 
 
-def remove_zero(inputlist: Sequence[float]) -> List[float]:
+def remove_zero(inputlist: Sequence[Union[float, bool, None]]) -> List[float]:
 	"""
 	Remove zero values from the given list
 	Also removes False and None
@@ -212,7 +212,7 @@ def rounders(val_to_round: Union[str, float, Decimal], round_format: str) -> Dec
 	return Decimal(Decimal(val_to_round).quantize(Decimal(str(round_format)), rounding=ROUND_HALF_UP))
 
 
-def strip_strings(ls: List) -> List:
+def strip_strings(ls: Sequence[Any]) -> List:
 	"""
 	Remove strings from a list
 
@@ -226,7 +226,7 @@ def strip_strings(ls: List) -> List:
 	return [x for x in ls if not isinstance(x, str)]
 
 
-def strip_booleans(ls: List) -> List:
+def strip_booleans(ls: Sequence[Any]) -> List:
 	"""
 	Remove booleans from a list
 
@@ -240,7 +240,7 @@ def strip_booleans(ls: List) -> List:
 	return [x for x in ls if not isinstance(x, bool)]
 
 
-def strip_nonetype(ls: List) -> List:
+def strip_nonetype(ls: Sequence[Any]) -> List:
 	"""
 	Remove None from a list
 
@@ -254,7 +254,7 @@ def strip_nonetype(ls: List) -> List:
 	return [x for x in ls if x is not None]
 
 
-def strip_none_bool_string(ls: List) -> List:
+def strip_none_bool_string(ls: Sequence[Any]) -> List:
 	"""
 	Remove None, Boolean and strings from a list
 
@@ -270,7 +270,7 @@ def strip_none_bool_string(ls: List) -> List:
 	return ls
 
 
-def gcd(a: float, b: float) -> float:
+def gcd(a: int, b: int) -> int:
 	"""
 	Returns the GCD (HCF) of a and b using Euclid's Algorithm
 
@@ -308,7 +308,7 @@ def gcd_array(array) -> float:
 	return x
 
 
-def gcd2(numbers: Sequence[float]) -> float:
+def gcd2(numbers: int) -> int:
 	"""
 	Returns the GCD (HCF) of a list of numbers using Euclid's Algorithm
 
@@ -342,30 +342,30 @@ def lcm(numbers:Sequence[float]) -> float:
 		return product
 
 
-def hcf(a: float, b: float) -> float:
+def hcf(a: int, b: int) -> int:
 	"""
 
 	:param a:
 	:param b:
 
-	:return:float
+	:return:int
 	"""
 
 	gcd(a, b)
 
 
-def hcf2(numbers: float) -> float:
+def hcf2(numbers: int) -> int:
 	"""
 
 	:param numbers:
 
-	:return:float
+	:return:int
 	"""
 
 	gcd2(numbers)
 
 
-def modInverse(a: float, m: float) -> Optional[float]:
+def modInverse(a: int, m: int) -> Optional[float]:
 	"""
 	Returns the modular inverse of a % m, which is the number x such that a*x % m = 1
 	:param a:

From 493642e0c4fecff9286b40ff3c269b9a479710ba Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Sat, 6 Jun 2020 19:49:51 +0100
Subject: [PATCH 06/11] Added type annotations

---
 mathematical/__init__.py          |  1 +
 mathematical/data_frames.py       |  2 ++
 mathematical/linear_regression.py |  8 ++++++--
 mathematical/outliers.py          |  9 ++++++---
 mathematical/stats.py             | 21 +++++++++++++++------
 mathematical/utils.py             | 13 +++++--------
 tests/test_data_frames.py         | 12 ++++--------
 tests/test_stats.py               |  3 +++
 tests/test_utils.py               |  4 ++++
 9 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/mathematical/__init__.py b/mathematical/__init__.py
index a095050..bf46fba 100644
--- a/mathematical/__init__.py
+++ b/mathematical/__init__.py
@@ -32,4 +32,5 @@
 __version__ = "0.1.11"
 __email__ = "dominic@davis-foster.co.uk"
 
+# this package
 from . import data_frames, outliers, stats, utils
diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 70835ca..0696979 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -27,9 +27,11 @@
 #
 #
 
+# stdlib
 # Outlier Modes
 from typing import List
 
+# 3rd party
 from pandas import Series  # type: ignore
 
 MAD = 1
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index 60d4908..e56bcc1 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -49,14 +49,18 @@
 #  |  DOI: `10.1021/acs.jproteome.8b00717 <http://dx.doi.org/10.1021/acs.jproteome.8b00717>`_
 #
 
-# 3rd party
+# stdlib
 from typing import Tuple
 
+# 3rd party
 import numpy  # type: ignore
 from domdf_python_tools.doctools import is_documented_by  # type: ignore
 
 
-def linear_regression_vertical(x: numpy.ndarray, y: numpy.ndarray = None, a = None, b = None) -> Tuple[float, float, float, float]:
+def linear_regression_vertical(x: numpy.ndarray,
+								y: numpy.ndarray = None,
+								a=None,
+								b=None) -> Tuple[float, float, float, float]:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *vertical* distances between the points and the line.
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index baf6f75..937e911 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -29,13 +29,16 @@
 #  along with this program; if not, write to the Free Software
 #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 #  MA 02110-1301, USA.
+# stdlib
 #
 #
-from typing import Sequence, Tuple, List
+from typing import List, Sequence, Tuple
 
+# 3rd party
 import numpy  # type: ignore
-from . import utils
-from . import stats
+
+# this package
+from . import stats, utils
 
 
 def mad_outliers(
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 4e15902..b9e831b 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -44,10 +44,9 @@
 
 # stdlib
 import warnings  # type: ignore
+from typing import Callable, List, Optional, Sequence, Union
 
 # 3rd party
-from typing import List, Sequence, Callable, Union, Optional
-
 import numpy  # type: ignore
 
 # this package
@@ -178,7 +177,9 @@ def pooled_sd(sample1: Sequence[float], sample2: Sequence[float], weighted: bool
 		return numpy.sqrt(((sd1**2) + (sd2**2)) / 2)
 
 
-def d_cohen(sample1: Sequence[float], sample2:Sequence[float], sd: int = 1, tail = 1, pooled: bool = False) -> float:
+def d_cohen(
+		sample1: Sequence[float], sample2: Sequence[float], sd: int = 1, tail=1, pooled: bool = False
+		) -> float:
 	"""
 	Cohen's d-Statistic
 
@@ -307,7 +308,13 @@ def _contains_nan(a, nan_policy: str = 'propagate'):
 	return contains_nan, nan_policy
 
 
-def median_absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'): #TODO
+def median_absolute_deviation(
+		x,
+		axis: int = 0,
+		center: Callable = numpy.median,
+		scale: float = 1.4826,
+		nan_policy: str = 'propagate'
+		):  #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
@@ -390,7 +397,7 @@ def median_absolute_deviation(x, axis: int = 0, center: Callable = numpy.median,
 	return scale * mad
 
 
-def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_policy: str = 'propagate'): #TODO
+def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_policy: str = 'propagate'):  #TODO
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -450,7 +457,9 @@ def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_po
 	return ad
 
 
-def absolute_deviation_from_median(x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'):
+def absolute_deviation_from_median(
+		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'
+		):
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.
diff --git a/mathematical/utils.py b/mathematical/utils.py
index c35793b..9c475ff 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -80,11 +80,11 @@
 # stdlib
 import decimal
 import math
+from decimal import Decimal
 from operator import eq, ge, gt, le, lt, ne
+from typing import Any, List, Optional, Sequence, Union
 
 # 3rd party
-from typing import List, Sequence, Any, Union, Optional
-
 import numpy  # type: ignore
 
 
@@ -192,9 +192,6 @@ def RepresentsInt(s: Any) -> bool:
 	except (ValueError, TypeError) as e:
 		return False
 
-from decimal import Decimal
-
-
 
 def rounders(val_to_round: Union[str, float, Decimal], round_format: str) -> Decimal:
 	"""
@@ -323,7 +320,7 @@ def gcd2(numbers: int) -> int:
 	return c
 
 
-def lcm(numbers:Sequence[float]) -> float:
+def lcm(numbers: Sequence[float]) -> float:
 	"""
 	Returns the LCM of a list of numbers using Euclid's Algorithm
 	:param numbers:
@@ -391,7 +388,7 @@ def modInverse(a: int, m: int) -> Optional[float]:
 _precalc_fact = numpy.log([math.factorial(n) for n in range(20)])
 
 
-def log_factorial(x: float)-> float:
+def log_factorial(x: float) -> float:
 	x = numpy.array(x)
 	pf = _precalc_fact
 	m = (x >= pf.size)
@@ -419,7 +416,7 @@ def _expectation(d: float, T: float, p: float = 0.5):
 	return ((m * pi).cumsum() / pi.cumsum())[T]
 
 
-def _confidence_value(conf: float, d: float, T: float, p: float = 0.5) :
+def _confidence_value(conf: float, d: float, T: float, p: float = 0.5):
 	if T is not None:
 		T = numpy.array(T, dtype=int)
 		m = numpy.arange(T.max() + 1, dtype=int)
diff --git a/tests/test_data_frames.py b/tests/test_data_frames.py
index e43c348..a9a8249 100644
--- a/tests/test_data_frames.py
+++ b/tests/test_data_frames.py
@@ -7,20 +7,16 @@
 
 """
 
+# stdlib
 import copy
 
+# 3rd party
 import pandas  # type: ignore
 import pytest  # type: ignore
 
+# this package
 from mathematical.data_frames import (
-		df_count,
-		df_data_points,
-		df_log,
-		df_log_stdev,
-		df_mean,
-		df_median,
-		df_percentage,
-		df_stdev,
+		df_count, df_data_points, df_log, df_log_stdev, df_mean, df_median, df_percentage, df_stdev
 		)
 
 
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 0e0fd8e..d6b8fa6 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -6,7 +6,10 @@
 Test functions in stats.py
 
 """
+# 3rd party
 import numpy  # type: ignore
+
+# this package
 from mathematical import stats
 
 data = [1, 2, 3, 4, 5, 0, "abc", False, None, numpy.nan]
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 15e9ac0..a80220a 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -50,9 +50,13 @@
 #  |  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 
+# stdlib
 import decimal  # type: ignore
+
+# 3rd party
 import numpy  # type: ignore
 
+# this package
 from mathematical import utils
 
 data = [1, 2, 3, 4, 5, 0, "abc", False, None, numpy.nan]

From ee1671ea60e5ebfdf61bbaab9f8854ea95178995 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Sat, 6 Jun 2020 22:30:09 +0100
Subject: [PATCH 07/11] Added type annotations

---
 mathematical/data_frames.py       | 17 +++++++++--------
 mathematical/linear_regression.py | 10 ++++++----
 mathematical/outliers.py          |  3 ++-
 mathematical/stats.py             | 17 +++++++++++++----
 tests/test_utils.py               |  2 +-
 tox.ini                           |  4 ++--
 6 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 0696979..9d3ed36 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -28,18 +28,19 @@
 #
 
 # stdlib
-# Outlier Modes
-from typing import List
+from typing import List, Optional, Sequence
 
 # 3rd party
 from pandas import Series  # type: ignore
 
+# Outlier Modes
+
 MAD = 1
 QUARTILES = 2
 STDEV2 = 3
 
 
-def df_mean(row: Series, column_label_list: List[str] = None) -> float:
+def df_mean(row: Series, column_label_list: Optional[Sequence[str]] = None) -> float:
 	"""
 	Calculate the mean of each row for the specified columns of a data frame
 
@@ -65,7 +66,7 @@ def df_mean(row: Series, column_label_list: List[str] = None) -> float:
 	return float(nanmean(row[column_label_list]))
 
 
-def df_median(row: Series, column_label_list: List[str] = None) -> float:
+def df_median(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
 	"""
 	Calculate the median of each row for the specified columns of a data frame
 
@@ -91,7 +92,7 @@ def df_median(row: Series, column_label_list: List[str] = None) -> float:
 	return float(nanmedian(row[column_label_list]))
 
 
-def df_stdev(row: Series, column_label_list: List[str] = None) -> float:
+def df_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
 	"""
 	Calculate the standard deviation of each row for the specified columns of a data frame
 
@@ -117,7 +118,7 @@ def df_stdev(row: Series, column_label_list: List[str] = None) -> float:
 	return float(nanstd(row[column_label_list]))
 
 
-def df_log_stdev(row: Series, column_label_list: List[str] = None) -> float:
+def df_log_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
 	"""
 	Calculate the standard deviation of the log10 values in each row for the specified columns of a data frame
 
@@ -196,7 +197,7 @@ def df_log(row: Series, column_label_list: List[str], base: float = 10) -> float
 		return 0
 
 
-def df_data_points(row: Series, column_label_list: List[str]) -> List:
+def df_data_points(row: Series, column_label_list: Optional[Sequence[list]]) -> List:
 	"""
 	Compile the values for the specified columns in each row into a list
 
@@ -259,7 +260,7 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode:
 	return Series(list(x))
 
 
-def df_count(row: Series, column_label_list: List[str] = None) -> int:
+def df_count(row: Series, column_label_list: Optional[Sequence[list]] = None) -> int:
 	"""
 	Count the number of occurrences of a non-NaN value in the specified columns of a data frame
 
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index e56bcc1..619a9cf 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -57,10 +57,12 @@
 from domdf_python_tools.doctools import is_documented_by  # type: ignore
 
 
-def linear_regression_vertical(x: numpy.ndarray,
-								y: numpy.ndarray = None,
-								a=None,
-								b=None) -> Tuple[float, float, float, float]:
+def linear_regression_vertical(
+		x: numpy.ndarray,
+		y: numpy.ndarray = None,
+		a=None,
+		b=None,
+		) -> Tuple[float, float, float, float]:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *vertical* distances between the points and the line.
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index 937e911..6933c02 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -29,6 +29,7 @@
 #  along with this program; if not, write to the Free Software
 #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 #  MA 02110-1301, USA.
+
 # stdlib
 #
 #
@@ -104,7 +105,7 @@ def two_stdev(dataset: Sequence[float], strip_zero: bool = True):
 	return stdev_outlier(dataset, strip_zero=strip_zero)
 
 
-def stdev_outlier(dataset: Sequence[float], strip_zero: bool = True, rng=int(2)):
+def stdev_outlier(dataset: Sequence[float], strip_zero: bool = True, rng: int = 2):
 	"""
 	Outliers are greater than rng*stdev from mean
 
diff --git a/mathematical/stats.py b/mathematical/stats.py
index b9e831b..841edfe 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -178,7 +178,11 @@ def pooled_sd(sample1: Sequence[float], sample2: Sequence[float], weighted: bool
 
 
 def d_cohen(
-		sample1: Sequence[float], sample2: Sequence[float], sd: int = 1, tail=1, pooled: bool = False
+		sample1: Sequence[float],
+		sample2: Sequence[float],
+		sd: int = 1,
+		tail=1,
+		pooled: bool = False,
 		) -> float:
 	"""
 	Cohen's d-Statistic
@@ -313,7 +317,7 @@ def median_absolute_deviation(
 		axis: int = 0,
 		center: Callable = numpy.median,
 		scale: float = 1.4826,
-		nan_policy: str = 'propagate'
+		nan_policy: str = 'propagate,'
 		):  #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
@@ -397,7 +401,12 @@ def median_absolute_deviation(
 	return scale * mad
 
 
-def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_policy: str = 'propagate'):  #TODO
+def absolute_deviation(
+		x,
+		axis: int = 0,
+		center: Callable = numpy.median,
+		nan_policy: str = 'propagate',
+		):  #TODO
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -458,7 +467,7 @@ def absolute_deviation(x, axis: int = 0, center: Callable = numpy.median, nan_po
 
 
 def absolute_deviation_from_median(
-		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate'
+		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate,'
 		):
 	"""
 	Compute the absolute deviation from the median of each point in the data
diff --git a/tests/test_utils.py b/tests/test_utils.py
index a80220a..8081da3 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -51,7 +51,7 @@
 #
 
 # stdlib
-import decimal  # type: ignore
+import decimal
 
 # 3rd party
 import numpy  # type: ignore
diff --git a/tox.ini b/tox.ini
index 4179b34..0045943 100644
--- a/tox.ini
+++ b/tox.ini
@@ -66,7 +66,7 @@ commands = flake8 mathematical tests
 
 
 [testenv:yapf]
-basepython = python3.7
+basepython = python3.8
 changedir={toxinidir}
 skip_install = true
 ignore_errors=true
@@ -75,7 +75,7 @@ commands = yapf -i --recursive mathematical tests
 
 
 [testenv:isort]
-basepython = python3.6
+basepython = python3.8
 skip_install = true
 ignore_errors=true
 changedir={toxinidir}

From fc30b42c75bbeae60770cce5cf6242ef23516f04 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Sun, 7 Jun 2020 18:39:18 +0100
Subject: [PATCH 08/11] added type annotations

---
 mathematical/stats.py | 31 ++++++++++++++---------------
 mathematical/utils.py | 46 ++++++++++++-------------------------------
 2 files changed, 28 insertions(+), 49 deletions(-)

diff --git a/mathematical/stats.py b/mathematical/stats.py
index 841edfe..5d0525e 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -180,7 +180,7 @@ def pooled_sd(sample1: Sequence[float], sample2: Sequence[float], weighted: bool
 def d_cohen(
 		sample1: Sequence[float],
 		sample2: Sequence[float],
-		sd: int = 1,
+		which: int = 1,
 		tail=1,
 		pooled: bool = False,
 		) -> float:
@@ -193,8 +193,8 @@ def d_cohen(
 	:type sample1: list
 	:param sample2: datapoints for second sample
 	:type sample2: list
-	:param sd: Use the standard deviation of the first sample (1) or the second sample (2)
-	:type sd: int
+	:param which: Use the standard deviation of the first sample (1) or the second sample (2)
+	:type which: int
 	:param tail:
 	:param pooled:
 
@@ -205,18 +205,18 @@ def d_cohen(
 	mean1 = numpy.mean(sample1)
 	mean2 = numpy.mean(sample2)
 
-	if sd == 1:
-		sd = numpy.std(sample1)
+	if which == 1:
+		stdev = numpy.std(sample1)
 	else:
-		sd = numpy.std(sample2)
+		stdev = numpy.std(sample2)
 
 	if pooled:
-		sd = pooled_sd(sample1, sample2)
+		stdev = pooled_sd(sample1, sample2)
 
 	if tail == 2:
-		return numpy.abs(mean1 - mean2) / sd
+		return numpy.abs(mean1 - mean2) / stdev
 
-	return (mean1 - mean2) / sd
+	return (mean1 - mean2) / stdev
 
 
 def g_hedge(sample1: Sequence[float], sample2: Sequence[float]) -> float:
@@ -256,7 +256,7 @@ def g_durlak_bias(g: float, n: float) -> float:
 	return g * Durlak
 
 
-def interpret_d(d_or_g: float) -> Optional[str]:
+def interpret_d(d_or_g: float) -> str:
 	"""
 	Interpret Cohen's d or Hedge's g values using Table 1
 	from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3444174/
@@ -268,9 +268,7 @@ def interpret_d(d_or_g: float) -> Optional[str]:
 	:rtype:
 	"""
 
-	if d_or_g < 0:
-		return f"{interpret_d(numpy.abs(d_or_g)).split(' ')[0]} Adverse Effect"
-	elif 0.0 <= d_or_g < 0.2:
+	if 0.0 <= d_or_g < 0.2:
 		return "No Effect"
 	elif 0.2 <= d_or_g < 0.5:
 		return "Small Effect"
@@ -278,7 +276,8 @@ def interpret_d(d_or_g: float) -> Optional[str]:
 		return "Intermediate Effect"
 	elif 0.8 <= d_or_g:
 		return "Large Effect"
-	return None
+	else:  # d_or_g < 0
+		return f"{interpret_d(numpy.abs(d_or_g)).split(' ')[0]} Adverse Effect"
 
 
 def _contains_nan(a, nan_policy: str = 'propagate'):
@@ -317,7 +316,7 @@ def median_absolute_deviation(
 		axis: int = 0,
 		center: Callable = numpy.median,
 		scale: float = 1.4826,
-		nan_policy: str = 'propagate,'
+		nan_policy: str = 'propagate'
 		):  #TODO
 	"""
 	Compute the median absolute deviation of the data along the given axis.
@@ -467,7 +466,7 @@ def absolute_deviation(
 
 
 def absolute_deviation_from_median(
-		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate,'
+		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate',
 		):
 	"""
 	Compute the absolute deviation from the median of each point in the data
diff --git a/mathematical/utils.py b/mathematical/utils.py
index 9c475ff..d494c5f 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -122,7 +122,7 @@ def roman(num: float) -> str:
 	values = 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1
 	result = ''
 	for t, v in zip(tokens, values):
-		cnt = num // v
+		cnt = int(num // v)
 		result += t * cnt
 		num -= v * cnt
 	return result
@@ -305,7 +305,7 @@ def gcd_array(array) -> float:
 	return x
 
 
-def gcd2(numbers: int) -> int:
+def gcd2(numbers: Sequence[int]) -> int:
 	"""
 	Returns the GCD (HCF) of a list of numbers using Euclid's Algorithm
 
@@ -320,7 +320,7 @@ def gcd2(numbers: int) -> int:
 	return c
 
 
-def lcm(numbers: Sequence[float]) -> float:
+def lcm(numbers: Sequence[int]) -> float:
 	"""
 	Returns the LCM of a list of numbers using Euclid's Algorithm
 	:param numbers:
@@ -348,10 +348,10 @@ def hcf(a: int, b: int) -> int:
 	:return:int
 	"""
 
-	gcd(a, b)
+	return gcd(a, b)
 
 
-def hcf2(numbers: int) -> int:
+def hcf2(numbers: Sequence[int]) -> int:
 	"""
 
 	:param numbers:
@@ -359,7 +359,7 @@ def hcf2(numbers: int) -> int:
 	:return:int
 	"""
 
-	gcd2(numbers)
+	return gcd2(numbers)
 
 
 def modInverse(a: int, m: int) -> Optional[float]:
@@ -389,14 +389,14 @@ def modInverse(a: int, m: int) -> Optional[float]:
 
 
 def log_factorial(x: float) -> float:
-	x = numpy.array(x)
+	arr = numpy.array(x)
 	pf = _precalc_fact
-	m = (x >= pf.size)
-	out = numpy.empty(x.shape)
-	out[~m] = pf[x[~m].astype(int)]
-	x = x[m]
-	out[m] = x * numpy.log(x) - x + 0.5 * numpy.log(2 * numpy.pi * x)
-	return out
+	m: bool = (arr >= pf.size)
+	out = numpy.empty(arr.shape)
+	out[~m] = pf[arr[~m].astype(int)]
+	arr = arr[m]
+	out[m] = arr * numpy.log(arr) - arr + 0.5 * numpy.log(2 * numpy.pi * arr)
+	return float(out)
 
 
 def _log_pi_r(d: float, k: float, p: float = 0.5) -> float:
@@ -405,23 +405,3 @@ def _log_pi_r(d: float, k: float, p: float = 0.5) -> float:
 
 def _log_pi(d: float, k: float, p: float = 0.5) -> float:
 	return _log_pi_r(d, k, p) + (d + 1) * math.log(1 - p)
-
-
-def _expectation(d: float, T: float, p: float = 0.5):
-	if T is None:
-		return d + 1
-	T = numpy.array(T, dtype=int)
-	m = numpy.arange(T.max() + 1, dtype=int)
-	pi = numpy.exp(_log_pi(d, m, p))
-	return ((m * pi).cumsum() / pi.cumsum())[T]
-
-
-def _confidence_value(conf: float, d: float, T: float, p: float = 0.5):
-	if T is not None:
-		T = numpy.array(T, dtype=int)
-		m = numpy.arange(T.max() + 1, dtype=int)
-	else:
-		m = numpy.arange(max(50 * d, 10000))
-	log_pi = _log_pi(d, m, p)
-	pics = numpy.exp(log_pi).cumsum()
-	return numpy.searchsorted(pics, conf * (pics[T] if T is not None else 1))

From d88633e5fecfa9c248f5d1a3f359476e30043fbc Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Sun, 7 Jun 2020 21:48:55 +0100
Subject: [PATCH 09/11] Added type annotations

---
 mathematical/data_frames.py | 2 +-
 mathematical/stats.py       | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index 9d3ed36..d85407a 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -197,7 +197,7 @@ def df_log(row: Series, column_label_list: List[str], base: float = 10) -> float
 		return 0
 
 
-def df_data_points(row: Series, column_label_list: Optional[Sequence[list]]) -> List:
+def df_data_points(row: Series, column_label_list: Sequence[str]) -> List:
 	"""
 	Compile the values for the specified columns in each row into a list
 
diff --git a/mathematical/stats.py b/mathematical/stats.py
index 5d0525e..a4a0a61 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -466,8 +466,12 @@ def absolute_deviation(
 
 
 def absolute_deviation_from_median(
-		x, axis: int = 0, center: Callable = numpy.median, scale: float = 1.4826, nan_policy: str = 'propagate',
-		):
+		x,
+		axis: int = 0,
+		center: Callable = numpy.median,
+		scale: float = 1.4826,
+		nan_policy: str = 'propagate',
+		) -> numpy.ndarray:
 	"""
 	Compute the absolute deviation from the median of each point in the data
 	along the given axis, given in terms of the MAD.

From 3c1b0d3a3cc7a65df2a7dd50de35dd22334a8e95 Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Mon, 8 Jun 2020 12:20:04 +0100
Subject: [PATCH 10/11] Added type annotations

---
 mathematical/data_frames.py       | 21 ++++--------
 mathematical/linear_regression.py | 29 +++++++++-------
 mathematical/outliers.py          | 41 ++++++++++++++++-------
 mathematical/stats.py             | 55 ++++++++++++++++---------------
 mathematical/utils.py             | 17 ++++++----
 5 files changed, 91 insertions(+), 72 deletions(-)

diff --git a/mathematical/data_frames.py b/mathematical/data_frames.py
index d85407a..0c0f005 100644
--- a/mathematical/data_frames.py
+++ b/mathematical/data_frames.py
@@ -52,7 +52,6 @@ def df_mean(row: Series, column_label_list: Optional[Sequence[str]] = None) -> f
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate mean for
-	:type column_label_list: list
 
 	:return: Mean
 	:rtype: float
@@ -66,7 +65,7 @@ def df_mean(row: Series, column_label_list: Optional[Sequence[str]] = None) -> f
 	return float(nanmean(row[column_label_list]))
 
 
-def df_median(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
+def df_median(row: Series, column_label_list: Optional[Sequence[str]] = None) -> float:
 	"""
 	Calculate the median of each row for the specified columns of a data frame
 
@@ -78,7 +77,6 @@ def df_median(row: Series, column_label_list: Optional[Sequence[list]] = None) -
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate median for
-	:type column_label_list: list
 
 	:return: Median
 	:rtype: float
@@ -92,7 +90,7 @@ def df_median(row: Series, column_label_list: Optional[Sequence[list]] = None) -
 	return float(nanmedian(row[column_label_list]))
 
 
-def df_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
+def df_stdev(row: Series, column_label_list: Optional[Sequence[str]] = None) -> float:
 	"""
 	Calculate the standard deviation of each row for the specified columns of a data frame
 
@@ -104,7 +102,6 @@ def df_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) ->
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate standard deviation for
-	:type column_label_list: list
 
 	:return: Standard deviation
 	:rtype: float
@@ -118,7 +115,7 @@ def df_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) ->
 	return float(nanstd(row[column_label_list]))
 
 
-def df_log_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None) -> float:
+def df_log_stdev(row: Series, column_label_list: Optional[Sequence[str]] = None) -> float:
 	"""
 	Calculate the standard deviation of the log10 values in each row for the specified columns of a data frame
 
@@ -130,7 +127,6 @@ def df_log_stdev(row: Series, column_label_list: Optional[Sequence[list]] = None
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate standard deviation for
-	:type column_label_list: list
 
 	:return: Standard deviation
 	:rtype: float
@@ -169,7 +165,7 @@ def df_percentage(row: Series, column_label: str, total: float) -> float:
 	return (row[column_label] / float(total)) * 100.0
 
 
-def df_log(row: Series, column_label_list: List[str], base: float = 10) -> float:
+def df_log(row: Series, column_label_list: Sequence[str], base: float = 10) -> float:
 	"""
 	Calculate the logarithm of the values in each row for the specified columns of a data frame
 
@@ -181,7 +177,6 @@ def df_log(row: Series, column_label_list: List[str], base: float = 10) -> float
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate log for
-	:type column_label_list: list
 	:param base: logarithmic base
 	:type base: float
 
@@ -209,7 +204,6 @@ def df_data_points(row: Series, column_label_list: Sequence[str]) -> List:
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to calculate standard deviation for
-	:type column_label_list: list
 
 	:return: data points
 	:rtype: list
@@ -218,7 +212,7 @@ def df_data_points(row: Series, column_label_list: Sequence[str]) -> List:
 	return [row[column_label] for column_label in column_label_list]
 
 
-def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode: int = MAD) -> Series:
+def df_outliers(row: Series, column_label_list: Sequence[str] = None, outlier_mode: int = MAD) -> Series:
 	"""
 	Identify outliers in each row
 
@@ -230,7 +224,6 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode:
 	:param row: row of the data frame
 	:type row: pandas.core.series.Series
 	:param column_label_list: list of column labels to determine outliers for
-	:type column_label_list: list
 	:param outlier_mode: outlier detection method to use
 	:type outlier_mode: int
 
@@ -255,12 +248,12 @@ def df_outliers(row: Series, column_label_list: List[str] = None, outlier_mode:
 	elif outlier_mode == STDEV2:
 		x = outliers.stdev_outlier(data, rng=2)  # outlier classed as more than 2 stdev away from mean
 	else:
-		return None
+		raise ValueError("Unknown outlier mode.")
 
 	return Series(list(x))
 
 
-def df_count(row: Series, column_label_list: Optional[Sequence[list]] = None) -> int:
+def df_count(row: Series, column_label_list: Optional[Sequence[str]] = None) -> int:
 	"""
 	Count the number of occurrences of a non-NaN value in the specified columns of a data frame
 
diff --git a/mathematical/linear_regression.py b/mathematical/linear_regression.py
index 619a9cf..ca10a85 100644
--- a/mathematical/linear_regression.py
+++ b/mathematical/linear_regression.py
@@ -50,18 +50,20 @@
 #
 
 # stdlib
-from typing import Tuple
+from typing import Optional, Sequence, Tuple, Union
 
 # 3rd party
 import numpy  # type: ignore
 from domdf_python_tools.doctools import is_documented_by  # type: ignore
 
+ArrayLike_Float = Union[Sequence[float], numpy.ndarray]
+
 
 def linear_regression_vertical(
-		x: numpy.ndarray,
-		y: numpy.ndarray = None,
-		a=None,
-		b=None,
+		x: ArrayLike_Float,
+		y: Optional[ArrayLike_Float] = None,
+		a: Optional[float] = None,
+		b: Optional[float] = None,
 		) -> Tuple[float, float, float, float]:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
@@ -81,7 +83,7 @@ def linear_regression_vertical(
 	:return: (a, b, r, stderr), where
 		a -- slope coefficient,
 		b -- free term,
-		r -- Peason correlation coefficient,
+		r -- Pearson correlation coefficient,
 		stderr -- standard deviation.
 	:rtype: tuple
 	"""
@@ -91,7 +93,7 @@ def linear_regression_vertical(
 		y = numpy.array(y, copy=False)
 	else:
 		if len(x.shape) != 2 or x.shape[-1] != 2:
-			raise TypeError('If `y` is not given, x.shape should be (N, 2), given: {}'.format(x.shape))
+			raise TypeError(f'If `y` is not given, x.shape should be (N, 2), given: {x.shape}')
 		y = x[:, 1]
 		x = x[:, 0]
 	if a is not None and b is None:
@@ -104,15 +106,16 @@ def linear_regression_vertical(
 	r = numpy.corrcoef(x, y)[0, 1]
 	stderr = (y - a * x - b).std()
 
-	return a, b, r, stderr
+	return a, b, r, stderr  # type: ignore  # TODO
 
 
-@is_documented_by(linear_regression_vertical)
-def linear_regression(x, y=None, a=None, b=None):
-	return linear_regression_vertical(x, y, a, b)
+linear_regression = linear_regression_vertical
 
 
-def linear_regression_perpendicular(x, y=None):
+def linear_regression_perpendicular(
+		x: ArrayLike_Float,
+		y: Optional[ArrayLike_Float] = None,
+		) -> Tuple[float, float, float, float]:
 	"""
 	Calculate coefficients of a linear regression y = a * x + b.
 	The fit minimizes *perpendicular* distances between the points and the line.
@@ -135,6 +138,7 @@ def linear_regression_perpendicular(x, y=None):
 	"""
 
 	x = numpy.array(x, copy=False)
+
 	if y is not None:
 		y = numpy.array(y, copy=False)
 		data = numpy.hstack((x.reshape((-1, 1)), y.reshape((-1, 1))))
@@ -142,6 +146,7 @@ def linear_regression_perpendicular(x, y=None):
 		if len(x.shape) != 2 or x.shape[-1] != 2:
 			raise TypeError('If `y` is not given, x.shape should be (N, 2), given: {}'.format(x.shape))
 		data = x
+
 	mu = data.mean(axis=0)
 	eigenvectors, eigenvalues, V = numpy.linalg.svd((data - mu).T, full_matrices=False)
 	a = eigenvectors[0][1] / eigenvectors[0][0]
diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index 6933c02..6b2de72 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -29,10 +29,10 @@
 #  along with this program; if not, write to the Free Software
 #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 #  MA 02110-1301, USA.
-
-# stdlib
 #
 #
+
+# stdlib
 from typing import List, Sequence, Tuple
 
 # 3rd party
@@ -51,7 +51,8 @@ def mad_outliers(
 	Using the Median Absolute Deviation to Find Outliers
 
 	:param dataset:
-	:type dataset: list
+	:param strip_zero:
+	:type strip_zero: bool
 	:param threshold: The multiple of MAD above which values are considered to be outliers
 		Leys et al (2013) make the following recommendations:
 			1 In univariate statistics, the Median Absolute Deviation is the most robust
@@ -68,7 +69,7 @@ def mad_outliers(
 		See https://dipot.ulb.ac.be/dspace/bitstream/2013/139499/1/Leys_MAD_final-libre.pdf
 	:type threshold: int
 
-	:return: #TODO
+	:return:
 	"""
 
 	dataset = utils.strip_none_bool_string(dataset)
@@ -93,11 +94,13 @@ def mad_outliers(
 	return outliers, data_exc_outliers
 
 
-def two_stdev(dataset: Sequence[float], strip_zero: bool = True):
+def two_stdev(dataset: Sequence[float], strip_zero: bool = True) -> Tuple[List[float], List[float]]:
 	"""
 	Outliers are greater than 2x stdev from mean
 
 	:param dataset:
+	:param strip_zero:
+	:type strip_zero: bool
 
 	:return: #	TODO
 	"""
@@ -105,12 +108,17 @@ def two_stdev(dataset: Sequence[float], strip_zero: bool = True):
 	return stdev_outlier(dataset, strip_zero=strip_zero)
 
 
-def stdev_outlier(dataset: Sequence[float], strip_zero: bool = True, rng: int = 2):
+def stdev_outlier(dataset: Sequence[float],
+					strip_zero: bool = True,
+					rng: int = 2) -> Tuple[List[float], List[float]]:
 	"""
 	Outliers are greater than rng*stdev from mean
 
 	:param dataset:
+	:param strip_zero:
+	:type strip_zero: bool
 	:param rng:
+	:type rng:
 
 	:return: 'TODO
 	"""
@@ -138,13 +146,15 @@ def stdev_outlier(dataset: Sequence[float], strip_zero: bool = True, rng: int =
 	return outliers, data_exc_outliers
 
 
-def quartile_outliers(dataset: Sequence[float], strip_zero: bool = True):
+def quartile_outliers(dataset: Sequence[float], strip_zero: bool = True) -> Tuple[List[float], List[float]]:
 	"""
 	outliers are more than 3x inter-quartile range from upper or lower quartile
 
-	:param dataset: #
+	:param dataset:
+	:param strip_zero:
+	:type strip_zero: bool
 
-	:return: #TODO
+	:return:
 	"""
 
 	dataset = utils.strip_none_bool_string(dataset)
@@ -176,16 +186,22 @@ def quartile_outliers(dataset: Sequence[float], strip_zero: bool = True):
 	return outliers, data_exc_outliers
 
 
-def spss_outliers(dataset: Sequence[float], strip_zero: bool = True, mode: str = "all"):
+def spss_outliers(
+		dataset: Sequence[float],
+		strip_zero: bool = True,
+		mode: str = "all",
+		):  # TODO:  -> Tuple[List[float], List[float], List[float]]
 	"""
-	Based on IBM SPSS method for detecting outliers
+	Based on IBM SPSS method for detecting outliers.
+
 	Outliers more than 1.5*IQR from Q1 or Q3
+
 	"Extreme values" more than 3*IQR from Q1 or Q3
 
 	:param dataset:
 	:param mode: str
 
-	:return: # TODO
+	:return:
 	"""
 
 	if len(dataset) < 2:
@@ -199,6 +215,7 @@ def spss_outliers(dataset: Sequence[float], strip_zero: bool = True, mode: str =
 		for val in dataset:
 			if val in ['', 0.0, 0]:
 				dataset.remove(val)
+
 	if len(dataset) == 0:
 		return float('nan')
 	elif dataset == [None]:
diff --git a/mathematical/stats.py b/mathematical/stats.py
index a4a0a61..6a577cb 100644
--- a/mathematical/stats.py
+++ b/mathematical/stats.py
@@ -58,7 +58,6 @@ def mean_none(dataset: Sequence[Union[float, bool, None]]) -> float:
 	Calculate the mean, excluding NaN, strings, boolean values, and zeros
 
 	:param dataset: list to calculate mean from
-	:type dataset: list
 
 	:return: mean
 	:rtype float
@@ -75,8 +74,8 @@ def std_none(dataset: Sequence[Union[float, bool, None]], ddof: int = 1) -> floa
 	Calculate the standard deviation, excluding NaN, strings, boolean values, and zeros
 
 	:param dataset: list to calculate mean from
-	:type dataset: list
-	:param ddof: Means Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. By default ddof is 1.
+	:param ddof: Means Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+		where N represents the number of elements. By default ddof is 1.
 	:type ddof: int
 	:return: standard deviation
 	:rtype float
@@ -89,12 +88,11 @@ def std_none(dataset: Sequence[Union[float, bool, None]], ddof: int = 1) -> floa
 	return float(numpy.nanstd(dataset, ddof=ddof))
 
 
-def median_none(dataset: Sequence[Union[float, bool, None]]):
+def median_none(dataset: Sequence[Union[float, bool, None]]) -> float:
 	"""
 	Calculate the median, excluding NaN, strings, boolean values, and zeros
 
 	:param dataset: list to calculate median from
-	:type dataset: list
 
 	:return: standard deviation
 	:rtype float
@@ -103,7 +101,7 @@ def median_none(dataset: Sequence[Union[float, bool, None]]):
 	dataset = utils.strip_none_bool_string(dataset)
 	dataset = utils.remove_zero(dataset)
 
-	return numpy.nanmedian(dataset)
+	return float(numpy.nanmedian(dataset))
 
 
 def iqr_none(dataset: Sequence[Union[float, bool, None]]) -> float:
@@ -111,8 +109,6 @@ def iqr_none(dataset: Sequence[Union[float, bool, None]]) -> float:
 	Calculate the interquartile range, excluding NaN, strings, boolean values, and zeros
 
 	:param dataset: list to calculate iqr from
-	:type dataset: list
-
 	:return: interquartile range
 	:rtype float
 	"""
@@ -130,8 +126,6 @@ def percentile_none(dataset: Sequence[Union[float, bool, None]], percentage: flo
 	Calculate the given percentile, excluding NaN, strings, boolean values, and zeros
 
 	:param dataset: list to calculate percentile from
-	:type dataset: list
-
 	:param percentage:
 	:type percentage: float
 
@@ -157,9 +151,7 @@ def pooled_sd(sample1: Sequence[float], sample2: Sequence[float], weighted: bool
 	Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm
 
 	:param sample1: datapoints for first sample
-	:type sample1: list
 	:param sample2: datapoints for second sample
-	:type sample2: list
 	:param weighted: True for weighted pooled SD
 
 	:return: Pooled Standard Deviation
@@ -181,7 +173,7 @@ def d_cohen(
 		sample1: Sequence[float],
 		sample2: Sequence[float],
 		which: int = 1,
-		tail=1,
+		tail: int = 1,
 		pooled: bool = False,
 		) -> float:
 	"""
@@ -190,9 +182,7 @@ def d_cohen(
 	Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd Edition). Hillsdale, NJ: Lawrence Erlbaum Associates
 
 	:param sample1: datapoints for first sample
-	:type sample1: list
 	:param sample2: datapoints for second sample
-	:type sample2: list
 	:param which: Use the standard deviation of the first sample (1) or the second sample (2)
 	:type which: int
 	:param tail:
@@ -226,8 +216,8 @@ def g_hedge(sample1: Sequence[float], sample2: Sequence[float]) -> float:
 	Formula from https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/hedgeg.htm
 
 	:param sample1: datapoints for first sample
-	:type sample1: list
 	:param sample2: datapoints for second sample
+
 	:return:
 	"""
 
@@ -244,12 +234,12 @@ def g_durlak_bias(g: float, n: float) -> float:
 	n = n1+n2
 
 	:param g:
-	:type g:
+	:type g: float
 	:param n:
-	:type n:
+	:type n: float
 
 	:return:
-	:rtype:
+	:rtype: float
 	"""
 
 	Durlak = ((n - 3) / (n - 2.25)) * numpy.sqrt((n - 2) / n)
@@ -265,7 +255,7 @@ def interpret_d(d_or_g: float) -> str:
 	:type d_or_g:
 
 	:return:
-	:rtype:
+	:rtype: str
 	"""
 
 	if 0.0 <= d_or_g < 0.2:
@@ -317,14 +307,14 @@ def median_absolute_deviation(
 		center: Callable = numpy.median,
 		scale: float = 1.4826,
 		nan_policy: str = 'propagate'
-		):  #TODO
+		) -> numpy.ndarray:
 	"""
 	Compute the median absolute deviation of the data along the given axis.
 	The median absolute deviation (MAD, [1]_) computes the median over the
 	absolute deviations from the median. It is a measure of dispersion
 	similar to the standard deviation, but is more robust to outliers [2]_.
 	The MAD of an empty array is ``numpy.nan``.
-	.. versionadded:: 1.3.0
+
 	Parameters
 	----------
 	x : array_like
@@ -405,7 +395,7 @@ def absolute_deviation(
 		axis: int = 0,
 		center: Callable = numpy.median,
 		nan_policy: str = 'propagate',
-		):  #TODO
+		) -> numpy.ndarray:
 	"""
 	Compute the absolute deviations from the median of the data along the given axis.
 
@@ -511,6 +501,7 @@ def absolute_deviation_from_median(
 	will calculate the MAD around the mean - it will not calculate the *mean*
 	absolute deviation.
 	"""
+
 	ad = absolute_deviation(x, axis=axis, center=center, nan_policy=nan_policy)
 
 	if axis is None:
@@ -518,13 +509,23 @@ def absolute_deviation_from_median(
 	else:
 		mad = numpy.median(ad, axis=axis)
 
-	ad_from_median = ad / mad
-
-	return ad_from_median
+	return ad / mad
 
 
 def within1min(value1: float, value2: float) -> bool:
-	if value1 not in [0, None, ''] and value2 not in [0, None, '']:
+	"""
+	Returns whether ``value2`` is within one minute of ``value1``.
+
+	:param value1: A time
+	:type value1:
+	:param value2: another time
+	:type value2:
+
+	:return:
+	:rtype:
+	"""
+
+	if value1 and value2:
 		return (float(value1) - 1) < (float(value2)) < (float(value1) + 1)
 	else:
 		return False
diff --git a/mathematical/utils.py b/mathematical/utils.py
index d494c5f..dd506b6 100644
--- a/mathematical/utils.py
+++ b/mathematical/utils.py
@@ -92,16 +92,15 @@ def intdiv(p: float, q: float) -> int:
 	"""
 	Integer divsions which rounds toward zero
 
-	Examples
-	--------
+	**Examples**
 	>>> intdiv(3, 2)
 	1
 	>>> intdiv(-3, 2)
 	-1
 	>>> -3 // 2
 	-2
-
 	"""
+
 	r = p // q
 	if r < 0 and q * r != p:
 		r += 1
@@ -110,8 +109,7 @@ def intdiv(p: float, q: float) -> int:
 
 def roman(num: float) -> str:
 	"""
-	Examples
-	--------
+	**Examples**
 	>>> roman(4)
 	'IV'
 	>>> roman(17)
@@ -165,7 +163,7 @@ def remove_zero(inputlist: Sequence[Union[float, bool, None]]) -> List[float]:
 	return list(inputlist[numpy.nonzero(inputlist)])
 
 
-def isint(num: float) -> bool:  # Only works with floating point numbers
+def isint(num: float) -> bool:  # Only works with floating-point numbers
 	"""
 	Checks whether a float is an integer value
 
@@ -178,7 +176,7 @@ def isint(num: float) -> bool:  # Only works with floating point numbers
 	return num == int(num)
 
 
-def RepresentsInt(s: Any) -> bool:
+def represents_int(s: Any) -> bool:
 	"""
 	Checks whether a value can be converted to int
 
@@ -193,6 +191,9 @@ def RepresentsInt(s: Any) -> bool:
 		return False
 
 
+RepresentsInt = represents_int
+
+
 def rounders(val_to_round: Union[str, float, Decimal], round_format: str) -> Decimal:
 	"""
 	Round a value to the specified number format, e.g. "0.000" for three decimal places
@@ -377,9 +378,11 @@ def modInverse(a: int, m: int) -> Optional[float]:
 	# Calculation using the Extended Euclidean Algorithm
 	u1, u2, u3 = 1, 0, a
 	v1, v2, v3 = 0, 1, m
+
 	while v3 != 0:
 		q = u3 // v3  # // forces integer division in Python 3
 		v1, v2, v3, u1, u2, u3 = (u1 - q * v1), (u2 - q * v2), (u3 - q * v3), v1, v2, v3
+
 	return u1 % m
 
 

From f4613228142a223223eb4d2cac8c02b711ec84ce Mon Sep 17 00:00:00 2001
From: Chris <chris@davis-foster.co.uk>
Date: Mon, 8 Jun 2020 12:35:27 +0100
Subject: [PATCH 11/11] Added type annotations

---
 mathematical/outliers.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mathematical/outliers.py b/mathematical/outliers.py
index 6b2de72..eeefb74 100644
--- a/mathematical/outliers.py
+++ b/mathematical/outliers.py
@@ -108,9 +108,11 @@ def two_stdev(dataset: Sequence[float], strip_zero: bool = True) -> Tuple[List[f
 	return stdev_outlier(dataset, strip_zero=strip_zero)
 
 
-def stdev_outlier(dataset: Sequence[float],
-					strip_zero: bool = True,
-					rng: int = 2) -> Tuple[List[float], List[float]]:
+def stdev_outlier(
+		dataset: Sequence[float],
+		strip_zero: bool = True,
+		rng: int = 2,
+		) -> Tuple[List[float], List[float]]:
 	"""
 	Outliers are greater than rng*stdev from mean