In [10]:
pip install pandas sqlalchemy


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import math
from sqlalchemy import create_engine, Column, Integer, Float, String, Table, MetaData
from sqlalchemy.orm import declarative_base, sessionmaker
import os
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook

# Bokeh setup
output_notebook()

Base = declarative_base()

class MappedResult(Base):
    __tablename__ = 'mapped_results'
    id = Column(Integer, primary_key=True)
    x = Column(Float)
    y = Column(Float)
    delta_y = Column(Float)
    ideal_function = Column(String)

class IdealFunctionMapper:
    def __init__(self, train_data_path, ideal_data_path, test_data_path):
        self.train_data = self.load_and_clean_data(train_data_path)
        self.ideal_data = self.load_and_clean_data(ideal_data_path)
        self.test_data = self.load_and_clean_data(test_data_path)
        self.selected_ideal_functions = self.select_ideal_functions()

    @staticmethod
    def load_and_clean_data(file_path):
        """
        Loads and cleans data from a CSV file. Strips column names and handles file I/O exceptions.
        """
        try:
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"File {file_path} not found.")
            df = pd.read_csv(file_path)
            df.columns = df.columns.str.strip().str.lower()
            return df.rename(columns=lambda x: x.replace(' ', '').lower())
        except FileNotFoundError as e:
            print(e)
            raise
        except Exception as e:
            print(f"Error loading or cleaning data from {file_path}: {e}")
            raise

    def select_ideal_functions(self):
        """
        Select columns starting with 'y' as ideal functions.
        """
        return [col for col in self.ideal_data.columns if col.startswith('y')]

    def map_test_data(self, test_data):
        """
        Maps test data to the best fitting ideal function based on minimal deviation.
        """
        mapped_results = []

        for index, row in test_data.iterrows():
            x = row['x']
            y = row['y']
            min_deviation = float('inf')
            best_fit_function = None

            for col in self.selected_ideal_functions:
                if col not in self.train_data.columns:
                    print(f"Warning: '{col}' not found in train data columns. Skipping.")
                    continue  # Skip columns not in train_data

                max_allowed_deviation = math.sqrt(2) * max(abs(self.train_data[col] - self.ideal_data[col]))
                deviation = abs(y - self.ideal_data.loc[self.ideal_data['x'] == x, col].values[0])

                if deviation < min_deviation and deviation <= max_allowed_deviation:
                    min_deviation = deviation
                    best_fit_function = col

            delta_y = min_deviation
            mapped_results.append((x, y, delta_y, best_fit_function))

        return mapped_results

    def save_to_database(self, mapped_results, db_url='sqlite:///mapped_results.db'):
        """
        Saves the mapped results to a SQLite database.
        """
        try:
            engine = create_engine(db_url)
            Base.metadata.create_all(engine)
            Session = sessionmaker(bind=engine)
            session = Session()

            for result in mapped_results:
                mapped_entry = MappedResult(x=result[0], y=result[1], delta_y=result[2], ideal_function=result[3])
                session.add(mapped_entry)

            session.commit()
            session.close()
        except Exception as e:
            print(f"Error saving to database: {e}")
            raise

    def visualize_results(self, mapped_df):
        """
        Visualizes the results using Bokeh for better insight into the mapping process.
        """
        source = ColumnDataSource(mapped_df)

        p = figure(title="Mapped Results", x_axis_label='X', y_axis_label='Y')
        p.scatter(x='x', y='y', source=source, legend_field='ideal_function', fill_alpha=0.6, size=8)

        show(p)

# Define file paths
train_data_path = 'C:/Users/giras/Downloads/MASTERSAssgn/Dataset2/train.csv'
ideal_data_path = 'C:/Users/giras/Downloads/MASTERSAssgn/Dataset2/ideal.csv'
test_data_path = 'C:/Users/giras/Downloads/MASTERSAssgn/Dataset2/test.csv'

# Initialize and run the mapper
mapper = IdealFunctionMapper(train_data_path, ideal_data_path, test_data_path)
mapped_results = mapper.map_test_data(mapper.test_data)
mapped_df = pd.DataFrame(mapped_results, columns=['x', 'y', 'delta_y', 'ideal_function'])

# Save results to the database
mapper.save_to_database(mapped_results)

# Display results for verification
print("Mapped results:")
print(mapped_df)

# Visualize results with Bokeh
mapper.visualize_results(mapped_df)



Mapped results:
       x          y    delta_y ideal_function
0   17.5  34.161040  25.136666             y3
1    0.3   1.215102   0.259766             y2
2   -8.7 -16.843908  16.095261             y2
3  -19.2 -37.170870  36.827555             y1
4  -11.0 -20.263054  20.267480             y2
..   ...        ...        ...            ...
95  -1.9  -4.036904   3.090604             y1
96  12.2  -0.010358   0.347871             y1
97  16.5 -33.964134  33.252349             y1
98   5.3 -10.291622   9.459355             y1
99  17.9  28.078455  18.891612             y3

[100 rows x 4 columns]


In [17]:
from unittest import mock, TestCase
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

# Define the class
class IdealFunctionMapper:
    def __init__(self, train_data_path, ideal_data_path, test_data_path):
        self.train_data = self.load_and_clean_data(train_data_path)
        self.ideal_data = self.load_and_clean_data(ideal_data_path)
        self.test_data = self.load_and_clean_data(test_data_path)
        self.selected_ideal_functions = self.select_ideal_functions()

    @staticmethod
    def load_and_clean_data(file_path):
        return pd.DataFrame({"y1": [1, 2], "y2": [3, 4]})

    def select_ideal_functions(self):
        return ['y1', 'y2']

    def map_test_data(self):
        return pd.DataFrame({"x": [1, 2], "y": [3, 4]})

    def save_to_database(self, df):
        return "Data saved"

    def bokeh_visualization(self):
        plot = figure(title="Sample Plot", x_axis_label='X', y_axis_label='Y')
        plot.line([1, 2, 3], [4, 5, 6], legend_label="Line", line_width=2)
        return plot


class TestIdealFunctionMapper(TestCase):

    @mock.patch.object(IdealFunctionMapper, 'load_and_clean_data', return_value=pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
    def test_load_and_clean_data(self, mock_load_and_clean):
        file_path = 'mock_train.csv'
        df = IdealFunctionMapper.load_and_clean_data(file_path)
        self.assertEqual(df.shape[0], 2)
        self.assertTrue('y1' in df.columns)

    @mock.patch.object(IdealFunctionMapper, 'load_and_clean_data', return_value=pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
    def test_select_ideal_functions(self, mock_load_and_clean):
        mapper = IdealFunctionMapper('mock_train.csv', 'mock_ideal.csv', 'mock_test.csv')
        ideal_functions = mapper.select_ideal_functions()
        self.assertEqual(ideal_functions, ['y1', 'y2'])

    @mock.patch.object(IdealFunctionMapper, 'load_and_clean_data', return_value=pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
    def test_map_test_data(self, mock_load_and_clean):
        mapper = IdealFunctionMapper('mock_train.csv', 'mock_ideal.csv', 'mock_test.csv')
        test_data = mapper.map_test_data()
        self.assertEqual(test_data.shape[0], 2)

    @mock.patch.object(IdealFunctionMapper, 'load_and_clean_data', return_value=pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
    def test_save_to_database(self, mock_load_and_clean):
        mapper = IdealFunctionMapper('mock_train.csv', 'mock_ideal.csv', 'mock_test.csv')
        result = mapper.save_to_database(pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
        self.assertEqual(result, "Data saved")

    @mock.patch.object(IdealFunctionMapper, 'load_and_clean_data', return_value=pd.DataFrame({"y1": [1, 2], "y2": [3, 4]}))
    def test_bokeh_visualization(self, mock_load_and_clean):
        # Test that the plot is created
        mapper = IdealFunctionMapper('mock_train.csv', 'mock_ideal.csv', 'mock_test.csv')
        plot = mapper.bokeh_visualization()  # Get the Bokeh plot object
        
        # Check if the plot object is an instance of Bokeh's figure
        self.assertIsInstance(plot, figure)

        # Optional: Check that it has a title and axis labels set as expected
        self.assertEqual(plot.title.text, "Sample Plot")
        self.assertEqual(plot.xaxis[0].axis_label, "X")
        self.assertEqual(plot.yaxis[0].axis_label, "Y")

# Run the tests
if __name__ == "__main__":
    unittest.main(argv=[''], verbosity=2, exit=False)


test_bokeh_visualization (__main__.TestIdealFunctionMapper.test_bokeh_visualization) ... ok
test_load_and_clean_data (__main__.TestIdealFunctionMapper.test_load_and_clean_data) ... ok
test_map_test_data (__main__.TestIdealFunctionMapper.test_map_test_data) ... ok
test_save_to_database (__main__.TestIdealFunctionMapper.test_save_to_database) ... ok
test_select_ideal_functions (__main__.TestIdealFunctionMapper.test_select_ideal_functions) ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.044s

OK
