In [1]:
import sys
sys.path.append(".")

from pathlib import Path
from src.loaders import TrainingDataLoad, IdealDataLoad, TestDataLoad
from src.db_manager import DatabaseManager
from src.matcher import FunctionMatch
from src.mapper import Mapper
from src.visualizer import BokehViz

In [2]:
# Reload data using the OOP loaders

base_path = Path("data")  # change if your CSVs are not in /data

train_loader = TrainingDataLoad(base_path / "train.csv")
ideal_loader = IdealDataLoad(base_path / "ideal.csv")
test_loader  = TestDataLoad(base_path / "test.csv")

train_df = train_loader.load()
ideal_df = ideal_loader.load()
test_df  = test_loader.load()

train_df.head(), train_df.shape, ideal_df.shape, test_df.shape


(      x         y1         y2         y3         y4
 0 -20.0 -45.292340 -15999.796  99.529580  899.82750
 1 -19.9 -44.364960 -15761.017  99.895670  893.42740
 2 -19.8 -44.565968 -15524.681  98.855780  887.16046
 3 -19.7 -44.762450 -15290.500  98.126100  881.44870
 4 -19.6 -44.188698 -15058.586  97.511475  875.37726,
 (400, 5),
 (400, 51),
 (100, 2))

In [3]:
# Use DatabaseManager to write the base tables

db = DatabaseManager("sqlite:///assignment.db")

db.write_table("train", train_df)
db.write_table("ideal", ideal_df)

print("Tables 'train' and 'ideal' written via DatabaseManager.")


Tables 'train' and 'ideal' written via DatabaseManager.


In [4]:
# Use FunctionMatcher – should reproduce your earlier results

matcher = FunctionMatch(train_df=train_df, ideal_df=ideal_df)

best_matches = matcher.find_matches()

print("Best matches:")
for t, (i, err) in best_matches.items():
    print(f"  {t} -> {i} with SSE = {err:.4f}")

max_devs, thresholds, ideal_threshold = matcher.dev_and_thresh()

print("\nMax deviations per training function:")
for k, v in max_devs.items():
    print(f"  {k}: {v:.4f}")

print("\nThresholds per training function (max_dev × √2):")
for k, v in thresholds.items():
    print(f"  {k}: {v:.4f}")

print("\nThresholds per ideal function:")
for k, v in ideal_threshold.items():
    print(f"  {k}: {v:.4f}")

Best matches:
  y1 -> y13 with SSE = 34.0807
  y2 -> y24 with SSE = 33.4518
  y3 -> y36 with SSE = 35.5727
  y4 -> y40 with SSE = 34.9989

Max deviations per training function:
  y1: 0.4992
  y2: 0.4990
  y3: 0.4989
  y4: 0.4998

Thresholds per training function (max_dev × √2):
  y1: 0.7060
  y2: 0.7057
  y3: 0.7056
  y4: 0.7068

Thresholds per ideal function:
  y13: 0.7060
  y24: 0.7057
  y36: 0.7056
  y40: 0.7068


In [5]:
mapper = Mapper(
    test_df=test_df,
    ideal_df=ideal_df,
    best_matches=best_matches,
    ideal_threshold=ideal_threshold
)

mapping_df = mapper.map_test()

print("Mapping shape:", mapping_df.shape)
display(mapping_df.head())

# write into SQLite
mapper.write_to_db(db, table_name="mapping")
print("Table 'mapping' written via DatabaseManager.")


Mapping shape: (34, 4)


Unnamed: 0,x,y,delta_y,ideal_function
0,3.4,78.95702,0.34902,y24
1,17.6,57.98592,0.541664,y40
2,-0.7,-1.351244,0.665244,y24
3,-15.7,-7740.1426,0.3566,y24
4,-0.4,106.64051,0.59843,y40


Table 'mapping' written via DatabaseManager.


In [6]:
# Use VisualizationManager

viz = BokehViz(
    train_dframe=train_df,
    ideal_dframe=ideal_df,
    best_matches=best_matches,
    mapping_dframe=mapping_df,
)

# 1) training vs ideal plots (2x2 grid)
viz.training_vs_ideal()

# 2) mapped test points scatter
viz.mapped_test_points()


In [7]:
best_matches = matcher.find_matches()
print(best_matches)
print(type(best_matches))
print(len(best_matches))



{'y1': ('y13', 34.08070758146571), 'y2': ('y24', 33.45176095311642), 'y3': ('y36', 35.57270039576923), 'y4': ('y40', 34.998874813202264)}
<class 'dict'>
4
