In [3]:
import sys
sys.path.append(".")

from pathlib import Path
from src.loaders import TrainingDataLoader, IdealDataLoader, TestDataLoader
from src.db_manager import DatabaseManager
from src.matcher import FunctionMatcher
from src.mapper import TestMapper
from src.visualizer import VisualizationManager

In [4]:
# Reload data using the OOP loaders

base_path = Path("data")  # change if your CSVs are not in /data

train_loader = TrainingDataLoader(base_path / "train.csv")
ideal_loader = IdealDataLoader(base_path / "ideal.csv")
test_loader  = TestDataLoader(base_path / "test.csv")

train_df = train_loader.load()
ideal_df = ideal_loader.load()
test_df  = test_loader.load()

train_df.head(), train_df.shape, ideal_df.shape, test_df.shape


(      x         y1         y2         y3         y4
 0 -20.0 -45.292340 -15999.796  99.529580  899.82750
 1 -19.9 -44.364960 -15761.017  99.895670  893.42740
 2 -19.8 -44.565968 -15524.681  98.855780  887.16046
 3 -19.7 -44.762450 -15290.500  98.126100  881.44870
 4 -19.6 -44.188698 -15058.586  97.511475  875.37726,
 (400, 5),
 (400, 51),
 (100, 2))

In [5]:
# Use DatabaseManager to write the base tables

db = DatabaseManager("sqlite:///assignment.db")

db.write_table("train", train_df)
db.write_table("ideal", ideal_df)

print("Tables 'train' and 'ideal' written via DatabaseManager.")


Tables 'train' and 'ideal' written via DatabaseManager.


In [6]:
# Use FunctionMatcher â€“ should reproduce your earlier results

matcher = FunctionMatcher(train_df=train_df, ideal_df=ideal_df)

best_matches = matcher.find_best_matches()

print("Best matches:")
for t, (i, err) in best_matches.items():
    print(f"  {t} -> {i} with SSE = {err:.4f}")

max_devs, global_max_dev, threshold = matcher.compute_deviations_and_threshold()
print("\nMax deviations:", max_devs)
print("Global max deviation:", global_max_dev)
print("Threshold:", threshold)


Best matches:
  y1 -> y13 with SSE = 34.0807
  y2 -> y24 with SSE = 33.4518
  y3 -> y36 with SSE = 35.5727
  y4 -> y40 with SSE = 34.9989

Max deviations: {'y1': 0.4992209999999999, 'y2': 0.49900000000002365, 'y3': 0.49894299999999703, 'y4': 0.49977900000000375}
Global max deviation: 0.49977900000000375
Threshold: 0.7067942399892684


In [7]:
# Use TestMapper to map test points and write the mapping table

mapper = TestMapper(
    test_df=test_df,
    ideal_df=ideal_df,
    best_matches=best_matches,
    threshold=threshold
)

mapping_df = mapper.map_test_points()

print("Mapping shape:", mapping_df.shape)
display(mapping_df.head())

# write into SQLite
mapper.write_to_db(db, table_name="mapping")
print("Table 'mapping' written via DatabaseManager.")


Mapping shape: (34, 4)


Unnamed: 0,x,y,delta_y,ideal_function
0,3.4,78.95702,0.34902,y24
1,17.6,57.98592,0.541664,y40
2,-0.7,-1.351244,0.665244,y24
3,-15.7,-7740.1426,0.3566,y24
4,-0.4,106.64051,0.59843,y40


Table 'mapping' written via DatabaseManager.


In [8]:
# Use VisualizationManager

viz = VisualizationManager(
    train_df=train_df,
    ideal_df=ideal_df,
    best_matches=best_matches,
    mapping_df=mapping_df,
)

# 1) training vs ideal plots (2x2 grid)
viz.plot_training_vs_ideal()

# 2) mapped test points scatter
viz.plot_mapped_test_points()


In [None]:
# Sanity checks / simple unit tests

assert len(best_matches) == 4
assert threshold > 0
assert not mapping_df.empty
assert (mapping_df["delta_y"] <= threshold).all()

print("All sanity checks passed.")
