### If you want to use these functions in your code, save this file in the same folder as your homework file. Then put `%run MLequations_v3.ipynb` at the top of your current notebook to import this package.

# Imports

In [3]:
import math as m
import pandas as pd
import numpy as np
import statistics as stat
import matplotlib.pyplot as plt

# Information Based Learning 

### Shannon's Entropy

In [56]:
# t = target features
# _D = dataframe / partitioned dataframe
# x = unique classifications of target feature
# i = dictionary; keys[features]; values[number of occurences]
# n = length of the column vector
def shannon_entropy(_D):
    i = dict(_D[_D.columns[-1]].value_counts())
    n = _D.iloc[:,-1].size
    return sum([-(i[x] / n) * m.log2((i[x] / n)) for x in i.keys()])

### Remainder

In [63]:
# d = descriptive feature column
# _D = dataframe
# t = target feature column
# n_D = new dataframe with only 'd' and 't'
def remainder(d, _D):
    i = dict(d.value_counts())
    t = _D.iloc[:,-1]
    n = d.size
    n_D = pd.DataFrame({'d': d, 't': t})
    return sum((i[x] / n ) * shannon_entropy(n_D[n_D['d'] == x]) for x in i.keys())

### Information Gain

In [67]:
# d = descriptive feature column
# _D = dataframe
def information_gain(d, _D):
    return shannon_entropy(_D) - remainder(d, _D)

### Information Gain Ratio

In [61]:
def gain_ratio(d, _D):
    return information_gain(d,_D) / shannon_entropy(_D)

### Gini Index

In [57]:
def gini_index(_D):
    i = dict(_D[_D.columns[-1]].value_counts())
    n = _D.iloc[:,-1].size
    return 1 - (sum([(i[x] / n)**2 for x in i.keys()]))

### Remainder by Gini Index

In [66]:
def gini_remainder(d, _D):
    i = dict(d.value_counts())
    t = _D.iloc[:,-1]
    n = d.size
    n_D = pd.DataFrame({'d': d, 't': t})
    return sum((i[x] / n ) * gini_index(n_D[n_D['d'] == x]) for x in i.keys())

### Information Gain using Gini Index

In [69]:
def gini_info_gain(d, _D):
    return gini_index(_D) - gini_remainder(d, _D)

# Similarity Based Learning

### Euclidean Distance

In [13]:
# n = distances to neighbors
# k = number of neighbors
def e_dist(query, df, k = 1):
    n = []
    df = df.iloc[:,1:-1]
    for i, row in df.iterrows():
        dist = m.sqrt(sum(((query[col] - row[col]) ** 2) for col in df.columns))
        n.append((i, dist))
    n.sort(key = lambda s: s[1])
    return n[:k]

### Weighted K-NN Boolean

In [14]:
# k = number of neighbors
# df = dataframe
# w_m = dictionary of weighted distances
# n = sorted distances to k neighbors
# t = sum of weights where neighbor is 'True'
# f = sum of weights where neighbor is 'False'
# returns True if 't' is greater than 'f'; False otherwise
def w_knn(query, df, k = 1):
    w_n = {}
    n = e_dist(query, df, k)
    w_n = {n[i][0]: 1 / (n[i][1]**2) for i in range(k)}
    t, f = 0, 0
    for i in range(k):
        if df.iat[i, -1] == True:
            t += w_n[i] 
        else:
            f += w_n[i]
    print('True Weight:', t, '\nFalse Weight:' ,f)
    if t > f:
        return True
    return False

### Weighted K-NN Float

In [15]:
# k = number of neighbors
# w_n = dictionary of weighted distances
# n = sorted distances to k neighbors
# sum_w = sum of the weights
# sum_w_tf = sum of the weights * target feature
def w_knn_f(query, df, k = 1):
    w_n = {}
    n = e_dist(query, df, k)
    w_n = {n[i][0]: 1 / (n[i][1]**2) for i in range(k)}
    sum_w, sum_w_tf= 0, 0
    for x in w_n.keys():
        sum_w += w_n[x]
        sum_w_tf += (w_n[x] * df.iloc[x,-1])
    return sum_w_tf / sum_w

### Manhattan Distance

In [16]:
# k = number of neighbors
# n = tuple of distances between query to nearest neighbors; n[index][distance]
# df = dataframe
# i = index
# dist = distance from query to current neighbor
# returns the closest k distances; indexes consistent with original 'df'
def man_d(query, df, k = 1):
    n = []
    df = df.iloc[:,1:-1]
    for i, row in df.iterrows():
        dist = sum(abs(float(query[col] - row[col])) for col in df.columns)
        n.append((i, dist))
    n.sort(key = lambda s: s[1])
    return n[:k]

### Cosine Similarity

In [17]:
# c_sim = cosine similarity of current instance
# a_vec = magnitude of query vector
# b_vec = magnitude of current instance vector
# n = tuple of query's cosine similarities with k neighbors; indexes consistent with original 'df'
def cos_sim(query, df, k = 1):
    n = []
    df = df.iloc[:,1:-1]
    a_vec = m.sqrt(sum(query[a]**2 for a in query.keys()))
    for i, row in df.iterrows():
        b_vec = m.sqrt(sum(row[b]**2 for b in df.columns))
        sim = float(sum((query[col] * row[col]) / (a_vec * b_vec) for col in df.columns))
        n.append((i, sim))
    n.sort(key = lambda s: s[1])
    return n[-k:]

### Majority Vote Boolean

In [18]:
def maj_vote(n, df):
    t, f = 0, 0
    for i in range(len(n)):
        if df.iat[n[i][0], -1] == True:
            t += 1
        else:
            f += 1
    if max(t, f) == t:
        return 'True'
    return 'False'

### Majority Vote Float

In [19]:
def maj_vote_f(n, df):
    num = 0
    for i in range(len(n)):
        num += df.iat[n[i][0], -1]
    return num / len(n)

### Formatted Output

In [20]:
def form(model, n, k = 1, vote = None, unit = ""):
    print('This {}-NN {} model returns: \n\n(INDEX, VALUE)'.format(k, model))
    for x in n: 
        print(x)
    if vote != None:
        print('\nThe {} nearest neighbors predict a target feature of: {} {}'.format(k, vote, unit)) 

### Convert Binary Target Feature to True / False

In [21]:
def format_frame(df, tf):
    df_c = df.copy()
    for i in range(len(df)):
        if df_c.iat[i, -1] == tf:
            df_c.iat[i, -1] = 1
        else:
            df_c.iat[i, -1] = -1
    return df_c

### Range Normalization

In [22]:
def norm(df):
    df.iloc[:,1:-1] = df.iloc[:,1:-1].apply(normalize)
    return df

def normalize(v, h = 1, l = 0):
    v_min, v_max = v.min(), v.max()
    return [((x - v_min) / (v_max - v_min)) * (h - l) + l for x in v]

### Russell-Rao

In [23]:
def rr(query, df):
    cp = 0
    print('\nQUERY\n', query)
    print('\nDATA INSTANCE\n', df)
    for i in range(df.shape[1]):
        if query.iat[0, i] == df.iat[0, i]:
            if query.iat[0, i]:
                cp += 1
    return cp / df.shape[1]

# Trees

### K-D Tree

In [24]:
class Node:
    def __init__(self, item, left = None, right = None, axis = None):
        self.item = item
        self.left = left
        self.right = right
        self.axis = axis

def build_treehouse(tree_branch, depth = 0):
    if not tree_branch:
        return None

    k = len(tree_branch[0])
    axis = depth % k
    tree_branch.sort(key = lambda x: (x[1], x[0]))

    median_index = len(tree_branch) // 2
    actual_median = tree_branch[median_index]
    
    return Node(
        item = actual_median,
        left = build_treehouse(tree_branch[:median_index], depth + 1),
        right = build_treehouse(tree_branch[median_index + 1:], depth + 1),
        axis = axis
    )

### K-D Tree Nearest Neighbor

In [25]:
def distance(target, root):
    return sum((a - b) ** 2 for a, b in zip(target, root))

def k_neighbor(tree_root, target, depth = 0, best = None):
    if tree_root is None:
        return best

    k = len(target)
    axis = depth % k
    
    next_best = None
    next_branch = None

    if best is None or distance(target, tree_root.item) < distance(target, best.item):
        next_best = tree_root
    if target[axis] < tree_root.item[axis]:
        next_branch = tree_root.left
    else:
        next_branch = tree_root.right
        
    return k_neighbor(next_branch, target, depth + 1, next_best)

# Probability Based Learning

### Combinations

In [28]:
# n choose r
# n = total number of trials
# r = number of occurences for a specific outcome
def combinations(n, r):
    return m.factorial(n) / (m.factorial(r) * m.factorial(n-r))

### Binomial Distribution

In [26]:
# n = total number of trials
# r = number of occurences for a specific outcome
# p = probability of success on a single trial
def bin_dist(n, r, p):
    return (combinations(n, r) * p**r * (1-p)**(n-r))

### Binomial Distribution (At Least)

In [27]:
# n = total number of trials
# r = number of occurences for a specific outcome
# p = probability of success on a single trial
def bin_dist_least(n, r, p):
    if r == n:
        return bin_dist(n, r, p)
    return bin_dist(n, r, p) + bin_dist_least(n, r + 1, p)

### Z Score

In [30]:
def z_score(x, mu, sigma):
    return (x - mu) / sigma

### Probability Density Function

In [2]:
def prob_density_function(x, data):
    mu = data.mean
    sigma = data.stdev
    z = z_score(x, mu, sigma)
    print("  N ( {}, {:.4f}, {:.4f} )".format(x, mu, sigma))
    return 1 / (m.sqrt(2 * m.pi * (sigma**2) * (m.e**(z**2))))

### Naive Bayes Product

In [31]:
# data = pure subset
# target = target feature value
# x_values = list of x values
def bayes_product(data, target, x_values, bayes_prod = 1):
    
    for feature in data.columns[1:-1]:
        
        x = x_values.pop(0)
        
        print(f"\n{feature} | {target}")
        
        bayes_prod *= prob_density_function(x, data[feature])
        
    return bayes_prod

### Naive Bayes Query

In [32]:
def bayes_predict(data, features, values):
    
    if len(features) <= 0:
        return 1
        
    col = features.pop(0)
    val = values.pop(0)

    try: # catch if the partitioned set no longer includes the next value to query
        n = len(data[col])
        data_dict = data[col].value_counts().to_dict()
        
        prob = data_dict[val] / n
        
    except KeyError as e:
        return 0 # returns 0% probability since the query is impossible

    return prob * bayes_predict(data[data[col] == val], features, values)

### Bayes Prediction Query Singular

In [33]:
def bayes_predict_sin(data, features, values, n, k = 0, domain = 0):
    
    if len(features) <= 0:
        return 1
        
    col = features.pop(0)
    val = values.pop(0)
    
    if not k:
        prob = val / n
        return prob * bayes_predict_sin(data[data[col] == val], features, values, n)
    
    prob = (val + k) / (n + (k * domain))

    return prob * bayes_predict_sin(data[data[col] == val], features, values, n, k, domain)

### Naive Bayes Network

In [34]:
def bayes_network(data, y = None, given_y = None, index = 0):
    
    # base case
    if index == len(data.columns):
        return 1

    # total number of instances
    n_rows = len(data.iloc[:, index])

    # retrieve all unique values of a column and how many times they occur
    bayes_dict = data.iloc[:, index].value_counts().to_dict()

    # Get the name of the current column
    columns = data.columns.to_list()
    column_name = columns[index]

    # Get all unique values of current column as a list
    keys = list(bayes_dict.keys())
    
    for key_value in bayes_dict.keys():
        x_key = keys.pop(0)
        prob_x = bayes_dict[key_value] / n_rows
        data_next = data[data[column_name] == x_key]
        if not given_y:
            print(f"Starting in the '{column_name}' column, the initial probability of '{x_key}' is {prob_x:.2f}%\n")
            bayes_network(data_next, x_key, prob_x, index + 1)
        else:
            for i in range(index):
                print("---->", end = ' ')
            print()
            prob_x_y = prob_x * given_y
            print(f"In the next column '{column_name}', given the probability of '{y}' as {given_y:.2f}%, the probability of '{x_key}' is {prob_x_y:.2f}%\n")
            bayes_network(data_next, x_key, prob_x_y, index + 1)

# Error Based Learning

### Multivariate Regression

In [35]:
# w = list of weights
# d = list of feature values
# x = current index of weights
# y = current index of feature values
def multi_reg(w, d, d_0 = [1]):
    d = d_0 + d # prepend '1' to the list of feature values
    return sum([x * y for x, y in zip(w, d)])

### Error Sum

In [36]:
# t = list of target values
# w = list of weights
# data = dataset as a dataframe
# start = beginning column index for multivariate linear regression formula
# end = ending column index for multivariate linear regression

def error_sum(t, w, data, start = 1, end = None, error_sum = 0):

    for i in range(len(t)):
        d = data.iloc[i, start:end].tolist()
        error = (t[i] - multi_reg(w, d))
        error_sum += error
        
    return error_sum

### Squared Error Sum

In [37]:
# t = list of target values
# w = list of weights
# data = dataset as a dataframe
# start = beginning column index for multivariate linear regression formula
# end = ending column index for multivariate linear regression

def squared_error_sum(t, w, data, start = 1, end = None, squared_error_sum = 0):
    
    for i in range(len(t)):
            d = data.iloc[i, start:end].tolist()
            error = (t[i] - multi_reg(w, d))**2
            squared_error_sum += error
        
    return squared_error_sum / 2

### Error Delta

In [38]:
# t = list of target values
# w = list of weights
# data = dataset as a dataframe
# start = beginning column index for multivariate linear regression formula
# end = ending column index for multivariate linear regression
# d = feature value column as list

def error_delta(t, w, col, data, start = 1, end = None, delta = 0):

    for i in range(len(t)):
        d = data.iloc[i, start:end].tolist()
        error = col[i] * (t[i] - multi_reg(w, d))
        delta += error
        
    return delta

### Logistic Regression

In [39]:
def logistic(w, i, data):
    
    x = data.iloc[i, 1:].tolist()
    degree = [1, x[0], x[1], (x[0]**2), (x[1]**2), (x[0]**3), (x[1]**3), (x[0]*x[1])]
    
    log_reg = sum([a * b for a, b, in zip(w, degree)])
    
    return 1 / (1 + m.e**-(log_reg))

### Support Vector Machines

In [40]:
def svm(input_vector, data, alpha, w0, d_dot_q = 0, vector = 0):
    
    for i in range(len(data)):
        support_vector = data.iloc[i, 0:].tolist()
        
        d_dot_q = sum([d * q for d,q in zip(support_vector, input_vector)])
        
        vector += (support_vector[-1] * alpha[i] * d_dot_q + w0)

    return vector

### Kernel Trick

In [41]:
def svm_kernel(t, input_vector, data, alpha, w0, start = 0, end = None, svm = 0):
    
    for i in range(len(data)):
        support_vector = data.iloc[i, start:end].tolist()

        kernel = sum([((d * q) + 1)**2 for d, q in zip(support_vector, input_vector)])
        svm += (t[i] * alpha[i] * kernel + w0)

    return svm

### Basis Functions With SVM

In [42]:
def svm_basis(t, q, data, alpha, w0, start = 0, end = None, basis = 0):

    input_vector = [(q[0]**2), (q[1]**2), (m.sqrt(2) * q[0] * q[1]), (m.sqrt(2) * q[0]), (m.sqrt(2) * q[1]), 1]
    
    for i in range(len(data)):
        
        d = data.iloc[i, start:end].tolist()
        support_vector = [(d[0]**2), (d[1]**2), (m.sqrt(2) * d[0] * d[1]), (m.sqrt(2) * d[0]), (m.sqrt(2) * d[1]), 1]
        
        d_dot_q = sum([d * q for d,q in zip(support_vector, input_vector)])
        basis += (d[-1] * alpha[i] * d_dot_q + w0)

    return basis

# Evaluation Based Learning

### Misclassification Rate

In [57]:
def mis_rate(target_col, predict_col):
    miss = 0
    for expected, predicted in zip(target_col, predict_col):
        if expected != predicted:
            miss += 1
    return miss / len(predict_col)

### Target and Prediction Values

In [13]:
def t_p(target_col, predict_col):
    
    TP, FN, FP, TN = 0, 0, 0, 0
    
    for expected, predicted in zip(target_col, predict_col):
        if expected:
            if expected == predicted:
                TP += 1
            else:
                FN += 1
        else:
            if expected == predicted:
                TN += 1
            else:
                FP += 1
                
    return TP, FN, FP, TN

### Confusion Matrix

In [11]:
def con_matrix(target_col, predict_col):

    TP, FN, FP, TN = t_p(target_col, predict_col)

    data = {'Positive': [TP, FP],
            'Negative': [FN, TN]}
    
    row_index = ['Positive', 'Negative']
    
    matrix = pd.DataFrame(data, index = row_index)
    
    return matrix

### Recall

In [3]:
def recall(x, y):
    return x / (x + y)

### Class Average (Harmonic Mean)

In [1]:
def harmonic_mean(target_col, predict_col):

    TP, FN, FP, TN = t_p(target_col, predict_col)

    return 1 / ((1 / 2) * (1 / recall(TP, FN) + 1 / recall(TN, FP)))

### Average Class Accuracy

In [None]:
def average_class_accuracy(target_col, predict_col):

    TP, FN, FP, TN = t_p(target_col, predict_col)

    return (recall(TP, FN) + recall(TN, FP)) / 2

### Simple Accuracy

In [3]:
def simple_accuracy(target_col, predict_col):

    TP, FN, FP, TN = t_p(target_col, predict_col)

    return (TP + TN) / (TP + FN + FP + TN)

### F1 Measure

In [21]:
def f1(target_col, predict_col):
    
    TP, FN, FP, TN = t_p(target_col, predict_col)

    precision = recall(TP, FP)
    rec = recall(TP, FN)

    return 2 * ((precision * rec) / (precision + rec))

### Squared Error

In [53]:
def squared_error(target_col, predict_col):
    return sum([(expected - predicted)**2 for expected, predicted in zip(target_col, predict_col)]) / 2

### Sum of Squares

In [77]:
def squares_sum_eval(target_col, predict_col):
    mean_bar = m.mean(predict_col)
    return sum([(expected - mean_bar)**2 for expected in target_col]) / 2

### R2 Measure

In [70]:
def r2(target_col, predict_col):
    return 1 - (squared_error(target_col, predict_col) / squares_sum_eval(target_col, predict_col))