In [8]:
import random
from typing import TypeVar, List, Tuple

In [9]:
X = TypeVar("X")
Y = TypeVar("Y")

In [None]:
def split_data(data: List[X], prob: float) -> Tuple[List[X], List[X]]:
    data = data[:]
    random.shuffle(data)
    cut = int(len(data) * prob)
    return data[:cut], data[cut:]


data = [n for n in range(1000)]
train, test = split_data(data, 0.75)
print(len(train), len(test))
print(train[:10])

750 250
[617, 878, 583, 969, 118, 256, 314, 950, 41, 379]


In [None]:
def train_test_split(xs: List[X], ys: List[Y], test_pct: float) -> Tuple[List[X], List[X], List[Y], List[Y]]:
    idxs = [i for i in range(len(xs))]
    train_idxs, test_idxs = split_data(idxs, 1 - test_pct)

    return (
        [xs[i] for i in train_idxs],
        [xs[i] for i in test_idxs],
        [ys[i] for i in train_idxs],
        [ys[i] for i in test_idxs],
    )


xs = [x for x in range(1000)]
ys = [2 * x for x in xs]
x_train, x_test, y_train, y_test = train_test_split(xs, ys, 0.2)

print(len(x_train), len(y_train))
print(x_train[:5])
print(y_train[:5])

800 800
[706, 51, 858, 792, 565]
[1412, 102, 1716, 1584, 1130]


In [None]:
def accuracy(tp: int, fp: int, fn: int, tn: int) -> float:
    correct = tp + tn
    total = tp + fp + fn + tn
    return correct / total


print(accuracy(70, 4930, 13930, 981070))

0.98114


In [None]:
def precision(tp: int, fp: int, fn: int, tn: int) -> float:
    return tp / (tp + fp)


print(precision(70, 4930, 13930, 981070))

0.014


In [None]:
def recall(tp: int, fp: int, fn: int, tn: int) -> float:
    return tp / (tp + fn)


print(recall(70, 4930, 13930, 981070))

0.005


In [None]:
def f1_score(tp: int, fp: int, fn: int, tn: int) -> float:
    p = precision(tp, fp, fn, tn)
    c = recall(tp, fp, fn, tn)

    return 2 * p * c / (p + c)


print(f1_score(70, 4930, 13930, 981070))

0.00736842105263158
