In [1]:
#Importing libraries
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sklearn.linear_model import LinearRegression
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.io import output_notebook
from bokeh.transform import linear_cmap
from bokeh.models import ColorBar
from bokeh.palettes import Category10
import math

#For displaying the visualization in the same Jupyter notebook
output_notebook()

In [2]:
# Base class for database handling
class DataProcessor:
    def __init__(self, db_name='function_mapping.db'):
        self.engine = create_engine(f'sqlite:///{db_name}', echo=False)

    def load_csv_to_db(self, csv_path, table_name):
        try:
            df = pd.read_csv(csv_path)
            df.to_sql(table_name, con=self.engine, if_exists='replace', index=False)
            print(f"{table_name} loaded successfully.")
        except Exception as e:
            print(f"Error loading {table_name}: {e}")


In [3]:
# Data mapping class with inheritance
class DataMapper(DataProcessor): #Inheriting the base class: DataProcessor
    def select_best_functions(self):
        train_df = pd.read_sql('training_data', con=self.engine)
        ideal_df = pd.read_sql('ideal_functions', con=self.engine)
        selections = {}
        for train_col in train_df.columns[1:]:
            errors = {
                ideal_col: ((train_df[train_col] - ideal_df[ideal_col]) ** 2).sum()
                for ideal_col in ideal_df.columns[1:]
            }
            best_match = min(errors, key=errors.get)
            selections[train_col] = best_match
        return selections

    def map_test_points(self, selections):
        test_df = pd.read_sql('test_data', con=self.engine)
        train_df = pd.read_sql('training_data', con=self.engine)
        ideal_df = pd.read_sql('ideal_functions', con=self.engine)
        max_devs = {
            train_col: np.max(np.abs(train_df[train_col] - ideal_df[ideal_col]))
            for train_col, ideal_col in selections.items()
        }
        results = []
        for _, row in test_df.iterrows():
            x_val, y_val = row['x'], row['y']
            mapped = False
            for train_col, ideal_col in selections.items():
                ideal_row = ideal_df.loc[np.isclose(ideal_df['x'], x_val)]
                if ideal_row.empty:
                    continue
                ideal_y = ideal_row[ideal_col].values[0]
                deviation = abs(y_val - ideal_y)
                if deviation <= max_devs[train_col] * math.sqrt(2):
                    results.append({'x': x_val, 'y': y_val, 'Delta Y': deviation, 'Ideal Function': ideal_col})
                    mapped = True
                    break
            if not mapped:
                results.append({'x': x_val, 'y': y_val, 'Delta Y': None, 'Ideal Function': None})
        results_df = pd.DataFrame(results)
        results_df.to_sql('mapping_results', con=self.engine, if_exists='replace', index=False)
        print(f"Mapped {results_df['Ideal Function'].notnull().sum()} out of {len(test_df)} test points.")

In [4]:
# Visualizer class for all figures
class Visualizer(DataProcessor):
    def plot_training_vs_ideal(self, train_col, ideal_col):
        train_df = pd.read_sql('training_data', con=self.engine)
        ideal_df = pd.read_sql('ideal_functions', con=self.engine)
        p = figure(title=f"{train_col} vs {ideal_col}", x_axis_label='x', y_axis_label='y')
        p.line(train_df['x'], train_df[train_col], legend_label="Training", color="blue", line_width=2)
        p.line(ideal_df['x'], ideal_df[ideal_col], legend_label="Ideal", color="green", line_width=2, line_dash="dashed")
        p.legend.location = "top_left"
        show(p)

    def plot_mapped_test_points(self):
        mapped_df = pd.read_sql('mapping_results', con=self.engine).dropna(subset=['Ideal Function']).copy()
        mapped_df['color'] = mapped_df['Ideal Function'].astype('category').cat.codes.map(lambda x: Category10[10][x % 10])
        source = ColumnDataSource(mapped_df)
        p = figure(title="Mapped Test Points", x_axis_label="x", y_axis_label="y")
        p.circle(x='x', y='y', source=source, size=8, color='color', legend_field='Ideal Function')
        p.legend.location = "top_left"
        show(p)

    def plot_deviation_heatmap(self):
        df = pd.read_sql('mapping_results', con=self.engine).dropna(subset=['Delta Y']).copy()
        mapper = linear_cmap(field_name='Delta Y', palette='Viridis256',
                             low=df['Delta Y'].min(), high=df['Delta Y'].max())
        source = ColumnDataSource(df)
        p = figure(title="Deviation Heatmap", x_axis_label="x", y_axis_label="Delta Y")
        p.circle('x', 'Delta Y', size=8, source=source, color=mapper)
        color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0, 0))
        p.add_layout(color_bar, 'right')
        show(p)    

In [5]:
# Regression approximation
class RegressionApproximator(DataProcessor):
    def regression(self, train_col, ideal_col):
        train_df = pd.read_sql('training_data', con=self.engine)
        ideal_df = pd.read_sql('ideal_functions', con=self.engine)
        x = train_df[['x']].values
        y_ideal = ideal_df[[ideal_col]].values
        model = LinearRegression()
        model.fit(x, y_ideal)
        y_pred = model.predict(x)
        print(f"Regression between x and {ideal_col}:")
        print(f"  Coefficient: {model.coef_[0][0]:.4f}")
        print(f"  Intercept  : {model.intercept_[0]:.4f}")
        p = figure(title=f"Linear Regression Fit: x → {ideal_col}", x_axis_label="x", y_axis_label="y")
        p.line(x.flatten(), y_ideal.flatten(), legend_label="Actual", color="blue", line_width=2)
        p.line(x.flatten(), y_pred.flatten(), legend_label="Regression", color="red", line_dash="dashed", line_width=2)
        p.legend.location = "top_left"
        show(p)

In [6]:
# Test / Run All Logic
if __name__ == "__main__":
    # Creating instances
    mapper = DataMapper()
    visualizer = Visualizer()

    # Load datasets
    mapper.load_csv_to_db("train.csv", "training_data")
    mapper.load_csv_to_db("ideal.csv", "ideal_functions")
    mapper.load_csv_to_db("test.csv", "test_data")

    # Select ideal functions
    selections = mapper.select_best_functions()

    # Map test points
    mapper.map_test_points(selections)

    # Visualize one example
    train_y = list(selections.keys())[0]
    ideal_y = selections[train_y]
    visualizer.plot_training_vs_ideal(train_y, ideal_y)

training_data loaded successfully.
ideal_functions loaded successfully.
test_data loaded successfully.
Mapped 33 out of 100 test points.


In [7]:
    #Approximating Regression
    reg = RegressionApproximator()
    reg.regression("y1", "y23")

Regression between x and y23:
  Coefficient: -240.0040
  Intercept  : 7.9998


In [8]:
reg.regression('y2', 'y41')

Regression between x and y41:
  Coefficient: 1.9986
  Intercept  : -0.0012


In [9]:
reg.regression('y3', 'y3')

Regression between x and y3:
  Coefficient: -0.0027
  Intercept  : 9.9976


In [10]:
reg.regression('y4', 'y47')

Regression between x and y47:
  Coefficient: 0.0004
  Intercept  : -4.2364


In [11]:
#Plotting other visualizations
if __name__ == "__main__":
    # After mapping is done
    visualizer = Visualizer()

    # Plot Mapped test points
    visualizer.plot_mapped_test_points()

    # Plot heatmap
    visualizer.plot_deviation_heatmap()




