In [2]:
import pandas as pd
from task_modules.database_manager import DatabaseManager
from task_modules.least_squares_selector import LeastSquaresSelector
import numpy as np
from task_modules.test_data_mapper import TestDataMapper
from task_modules.visualize import visualize_data
import unittest

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


####  running unit tests

In [5]:
%run unit_tests.py

.....
----------------------------------------------------------------------
Ran 5 tests in 0.066s

OK


In [6]:
# This will run all the tests verbosely
unittest.main(argv=[""], verbosity=2, exit=False)

test_create_and_load_table (__main__.TestDatabaseManager.test_create_and_load_table) ... 

ok
test_create_table_with_empty_df (__main__.TestDatabaseManager.test_create_table_with_empty_df) ... ok
test_least_squares_mismatched_lengths (__main__.TestLeastSquaresSelector.test_least_squares_mismatched_lengths) ... ok
test_select_best_fit (__main__.TestLeastSquaresSelector.test_select_best_fit) ... ok
test_map_test_data (__main__.TestTestDataMapper.test_map_test_data) ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.052s

OK


<unittest.main.TestProgram at 0x75c009e28bc0>

In [4]:
class Args:
    def __init__(self, train, ideal, test, db, visualize):
        self.train = train
        self.ideal = ideal
        self.test = test
        self.db = db
        self.visualize = visualize


args = Args(
    train="datasets1/train.csv",
    ideal="datasets1/ideal.csv",
    test="datasets1/test.csv",
    db="localDB.db",
    visualize=False,
)

In [4]:
# Load the data from the provided file paths
df_train = pd.read_csv(args.train)
df_ideal = pd.read_csv(args.ideal)
df_test = pd.read_csv(args.test)

In [5]:
# Step 1: Initialize DatabaseManager and save data to the database
db_manager = DatabaseManager(db_name=args.db)
db_manager.create_table("train_data", df_train)
db_manager.create_table("ideal_functions", df_ideal)

In [6]:
# Step 2: Perform Least-Squares Selection
selector = LeastSquaresSelector(df_train, df_ideal)
selected_ideals = selector.select_best_fit()
selected_ideals

{'y1': 'y42', 'y2': 'y41', 'y3': 'y11', 'y4': 'y48'}

In [7]:
# Compute max deviations between training data and selected ideal functions
max_devs = {
    train_col: np.max(
        np.abs(df_train[train_col] - df_ideal[selected_ideals[train_col]])
    )
    for train_col in df_train.columns[1:]
}

In [8]:
# Step 3: Map test data to the ideal functions
mapper = TestDataMapper(df_test, df_ideal, selected_ideals, max_devs)
mapped_data = mapper.map_test_data()

In [9]:
mapped_data

Unnamed: 0,x,y_test,mapped_ideal,ideal_y,deviation
0,17.5,34.161040,y41,34.512188,-0.351148
1,0.3,1.215102,y41,0.747760,0.467342
2,-8.7,-16.843908,,,
3,-19.2,-37.170870,,,
4,-11.0,-20.263054,,,
...,...,...,...,...,...
95,-1.9,-4.036904,y41,-4.273150,0.236246
96,12.2,-0.010358,y48,-0.167227,0.156869
97,16.5,-33.964134,y42,-33.351200,-0.612934
98,5.3,-10.291622,y42,-10.322813,0.031191


In [10]:
# Save the mapped test data to the database
db_manager.create_table("test_results", mapped_data)

In [11]:
from bokeh.plotting import figure, show, output_notebook

# Enable Bokeh output in the notebook
output_notebook()

# Now you can run your visualize_data function and the plots should show up in the notebook
visualize_data(df_train, df_ideal, df_test, selected_ideals, mapped_data)


