In [None]:
from db_loader import load_csv_to_db
from db.connection_factory import SqliteFactory
from analytics import sum_deviations_squared, list_to_dataframe, max_deviation
from db.dao.test import TestDAO, TestIdealDAO
from db.dao.train import TrainDAO
from db.dao.ideal import IdealDAO
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd

In [None]:
# uncomment to reload DB

load_csv_to_db(SqliteFactory)

In [None]:
train_dao = TrainDAO(SqliteFactory)
ideal_dao = IdealDAO(SqliteFactory)

In [None]:
train_data = list_to_dataframe(train_dao.get_all())
ideal_data = list_to_dataframe(ideal_dao.get_all())

In [None]:
%matplotlib inline
import sys

ideal_no = {i: -1 for i in range(1, 5)}

for j in range(1, 5):
    train_k = "y%d" % j
    deviation = float('inf')
    
    for i in range(1, 51):
        ideal_k = 'y%d' % i
        tmp = sum_deviations_squared(ideal_data[ideal_k], train_data[train_k])
        
        if tmp < deviation:
            ideal_no[j] = i
            deviation = tmp
    
    print("Training function %d" % j)
    fig, axes = plt.subplots(1, 1, figsize=(15, 12))
    axes.scatter(train_data['x'], train_data[train_k], label='Training', marker='o')
    axes.plot(ideal_data['x'], ideal_data["y%d" % ideal_no[j]], "red", label='Ideal', linewidth=2, alpha=1)
    axes.legend()
    plt.show()

In [None]:
print("Training to ideal function number mapping")

pd.DataFrame(data=ideal_no, index=[0])

In [None]:
max_deviations = {}

for i in range(1, 5):
    train_k = "y%d" % i
    ideal_k = 'y%d' % ideal_no[i]
    max_deviations[ideal_no[i]] = max_deviation(train_data[train_k], ideal_data[ideal_k])

In [None]:
print("Max deviation of train functions")

%matplotlib inline
plt.barh([i for i in range(1, 5)], max_deviations.values())

for index, value in enumerate(max_deviations.values()):
    plt.text(value, index + 1, str(value))

plt.show()

In [None]:
test_dao = TestDAO(SqliteFactory)
test_data = test_dao.get_all()
test_data = list_to_dataframe(test_data)
test_data

In [None]:
assigned_ideal = []
delta_ys = []
ideal_ys = []

for i in range(len(test_data)):
    x = test_data['x'].values[i]
    y = test_data['y'].values[i]
    ideal_row = ideal_data[ideal_data['x'] == x].head()
    
    if ideal_row.empty:
        assigned_ideal.append(None)
        delta_y .append(None)
        continue
        
    min_delta_y = float('inf')
    ideal_func = None
    max_delta = None
    
    for k, v in max_deviations.items():
        delta_y = abs(y - ideal_row["y%d" % k].values[0])
        
        if v * math.sqrt(2) >= delta_y and min_delta_y > delta_y:
            min_delta_y = delta_y
            ideal_func = k
            max_delta = v
            
    assigned_ideal.append(ideal_func)
    delta_ys.append(min_delta_y if min_delta_y < float('inf') else None)
    ideal_ys.append(ideal_row["y%d" % ideal_func].values[0] if ideal_func is not None else None)

test_data['ideal_func'] = assigned_ideal
test_data['delta_y'] = delta_ys
test_data['ideal_y'] = ideal_ys
print(test_data.to_string())

In [None]:

fig, axes = plt.subplots(1, 1, figsize=(10, 10))
axes.scatter(test_data['x'], test_data['y'], label="Test", marker='o')

for k, v in max_deviations.items():
    test_data_copy = test_data.copy()
    test_data_copy.loc[test_data.ideal_func != k, 'ideal_y'] = None
    axes.scatter(test_data['x'], test_data_copy['ideal_y'], label="Ideal function %d" % k, alpha=0.7)
axes.legend()
plt.show()

In [None]:
test_ideal_dao = TestIdealDAO(SqliteFactory)

In [None]:
for i in range (len(test_data)):
    data = {
        'id': i + 1, # avoid duplicate
        'x': test_data['x'].values[i],
        'y': test_data['y'].values[i],
        'delta_y': test_data['delta_y'].values[i],
        'ideal_no': test_data['ideal_func'].values[i],
    }
    test_ideal_dao.save(data)

In [None]:
list_to_dataframe(test_ideal_dao.get_all())