In [1]:
import math
from functionalities import FunctionAdministrator
from linear_regressions import loss_minimisation, classification_search, calculate_mean_squarederror
from plots import plot_point, ideal_plots
from classification import sqlite_store_deviation_result
import pandas as pd
import numpy as np
from bokeh.io import show
from bokeh.plotting import figure

# Load training dataset
train_data = pd.read_csv("csv_src/train.csv")
print(train_data.head())

# Load ideal dataset
ideal_data = pd.read_csv("csv_src/ideal.csv")
print(ideal_data.head())

# Define tolerance for deviation acceptance
TOLERANCE_THRESHOLD = math.sqrt(2)

if __name__ == '__main__':
    ideal_csv_path = "csv_src/ideal.csv"
    train_csv_path = "csv_src/train.csv"

    ideal_function_manager = FunctionAdministrator(csv_source=ideal_csv_path)
    train_function_manager = FunctionAdministrator(csv_source=train_csv_path)

    train_function_manager.to_sql(name_of_file="training", suffix=" (training)")
    ideal_function_manager.to_sql(name_of_file="ideal", suffix=" (ideal)")

    selected_ideal_functions = []
    
    for train_function in train_function_manager:
        best_ideal_function = loss_minimisation(
            trn_func=train_function,
            competitor_functions_list=ideal_function_manager.functions,
            func_loss=calculate_mean_squarederror
        )

        best_ideal_function.tolerance_factor = TOLERANCE_THRESHOLD
        selected_ideal_functions.append(best_ideal_function)

        print("Largest deviation:", best_ideal_function.biggest_dev)
        print("Mean Squared Error:", best_ideal_function.error)
        print("Best fit: Train column", train_function.name, "with Ideal column", best_ideal_function.name)
        print("=" * 95)

    ideal_plots(selected_ideal_functions, "train_and_ideal")

    # Load test dataset
    test_data = pd.read_csv("csv_src/test.csv")
    ideal_data = pd.read_csv("csv_src/ideal.csv")

    x_values = test_data['x']
    y_values = test_data["y"]
    data_size = np.size(x_values)

    mean_x = np.mean(x_values)
    mean_y = np.mean(y_values)

    slope_numerator = np.sum(x_values * y_values) - data_size * mean_x * mean_y
    slope_denominator = np.sum(x_values * x_values) - data_size * mean_x * mean_x

    regression_slope = slope_numerator / slope_denominator
    regression_intercept = mean_y - regression_slope * mean_x

    print('Slope:', regression_slope, 'Intercept:', regression_intercept)

    predicted_y_values = regression_slope * x_values + regression_intercept

    residual_error = y_values - predicted_y_values
    deviation_list = []

    for ideal_col in selected_ideal_functions:
        y_ideal = ideal_data[ideal_col.name]
        deviation_per_point = []

        for i in range(len(predicted_y_values)):
            deviation = predicted_y_values[i] - y_ideal[i]
            deviation = deviation if deviation < TOLERANCE_THRESHOLD else TOLERANCE_THRESHOLD
            deviation_per_point.append(deviation)

        deviation_list.append(deviation_per_point)

    # Calculate Mean Absolute Error (MAE)
    mean_absolute_error = sum(abs(residual_error)) / len(residual_error)
    print("Error with ideal functions:", mean_absolute_error)

    # Plot results
    plot_figure = figure(width=600, height=400, 
                         title="Test Data with Slope: 35.72 & Intercept: 334.32 & Error: 1537.7865")

    plot_figure.circle(x_values, y_values, size=10, line_color="navy", 
                       legend_label="Test Data", fill_color="red")
    plot_figure.line(x_values, predicted_y_values, line_width=5, legend_label="Slope & Intercept")

    show(plot_figure)

    # Classification of test data points
    test_csv_path = "csv_src/test.csv"
    test_function_manager = FunctionAdministrator(csv_source=test_csv_path)
    test_function = test_function_manager.functions[0]

    classified_points = []
    for test_point in test_function:
        best_match_function, delta_y = classification_search(point=test_point, idl_funcs=selected_ideal_functions)
        classification_result = {"point": test_point, "classification": best_match_function, "delta_y": delta_y}
        classified_points.append(classification_result)

    plot_point(classified_points, "point_and_ideal")
    sqlite_store_deviation_result(classified_points)

    print("Name: Ibrahim Ahmed Ali Al-Jaf, Matriculation Number: 10241869, Master of AI")
    print("Date: 15.03.2025")

      x         y1         y2         y3        y4
0 -20.0  39.778572 -40.078590 -20.214268 -0.324914
1 -19.9  39.604813 -39.784000 -20.070950 -0.058820
2 -19.8  40.099070 -40.018845 -19.906782 -0.451830
3 -19.7  40.151100 -39.518402 -19.389118 -0.612044
4 -19.6  39.795662 -39.360065 -19.815890 -0.306076
      x        y1        y2        y3        y4        y5        y6        y7  \
0 -20.0 -0.912945  0.408082  9.087055  5.408082 -9.087055  0.912945 -0.839071   
1 -19.9 -0.867644  0.497186  9.132356  5.497186 -9.132356  0.867644 -0.865213   
2 -19.8 -0.813674  0.581322  9.186326  5.581322 -9.186326  0.813674 -0.889191   
3 -19.7 -0.751573  0.659649  9.248426  5.659649 -9.248426  0.751573 -0.910947   
4 -19.6 -0.681964  0.731386  9.318036  5.731386 -9.318036  0.681964 -0.930426   

         y8        y9  ...        y41        y42       y43       y44  \
0 -0.850919  0.816164  ... -40.456474  40.204040  2.995732 -0.008333   
1  0.168518  0.994372  ... -40.233820  40.048590  2.990720 -0.0

In [None]:
import math
from functionalities import FunctionAdministrator
from linear_regressions import loss_minimisation, classification_search, calculate_mean_squarederror
from plots import plot_point, ideal_plots
from classification import sqlite_store_deviation_result
import pandas as pd
import numpy as np
from bokeh.io import show
from bokeh.plotting import figure

# Load training dataset
train_data = pd.read_csv("csv_src/train.csv")
print(train_data.head())

# Load ideal dataset
ideal_data = pd.read_csv("csv_src/ideal.csv")
print(ideal_data.head())

# Define tolerance for deviation acceptance
TOLERANCE_THRESHOLD = math.sqrt(2)

if __name__ == '__main__':
    ideal_csv_path = "csv_src/ideal.csv"
    train_csv_path = "csv_src/train.csv"

    ideal_function_manager = FunctionAdministrator(csv_source=ideal_csv_path)
    train_function_manager = FunctionAdministrator(csv_source=train_csv_path)

    train_function_manager.to_sql(name_of_file="training", suffix=" (training)")
    ideal_function_manager.to_sql(name_of_file="ideal", suffix=" (ideal)")

    selected_ideal_functions = []
    
    for train_function in train_function_manager:
        best_ideal_function = loss_minimisation(
            trn_func=train_function,
            competitor_functions_list=ideal_function_manager.functions,
            func_loss=calculate_mean_squarederror
        )

        best_ideal_function.tolerance_factor = TOLERANCE_THRESHOLD
        selected_ideal_functions.append(best_ideal_function)

        print("Largest deviation:", best_ideal_function.biggest_dev)
        print("Mean Squared Error:", best_ideal_function.error)
        print("Best fit: Train column", train_function.name, "with Ideal column", best_ideal_function.name)
        print("=" * 95)

    ideal_plots(selected_ideal_functions, "train_and_ideal")

    # Load test dataset
    test_data = pd.read_csv("csv_src/test.csv")
    ideal_data = pd.read_csv("csv_src/ideal.csv")

    x_values = test_data['x']
    y_values = test_data["y"]
    data_size = np.size(x_values)

    mean_x = np.mean(x_values)
    mean_y = np.mean(y_values)

    slope_numerator = np.sum(x_values * y_values) - data_size * mean_x * mean_y
    slope_denominator = np.sum(x_values * x_values) - data_size * mean_x * mean_x

    regression_slope = slope_numerator / slope_denominator
    regression_intercept = mean_y - regression_slope * mean_x

    print('Slope:', regression_slope, 'Intercept:', regression_intercept)

    predicted_y_values = regression_slope * x_values + regression_intercept

    residual_error = y_values - predicted_y_values
    deviation_list = []

    for ideal_col in selected_ideal_functions:
        y_ideal = ideal_data[ideal_col.name]
        deviation_per_point = []

        for i in range(len(predicted_y_values)):
            deviation = predicted_y_values[i] - y_ideal[i]
            deviation = deviation if deviation < TOLERANCE_THRESHOLD else TOLERANCE_THRESHOLD
            deviation_per_point.append(deviation)

        deviation_list.append(deviation_per_point)

    # Calculate Mean Absolute Error (MAE)
    mean_absolute_error = sum(abs(residual_error)) / len(residual_error)
    print("Error with ideal functions:", mean_absolute_error)

    # Plot results
    plot_figure = figure(width=600, height=400, 
                         title="Test Data with Slope: 35.72 & Intercept: 334.32 & Error: 1537.7865")

    plot_figure.circle(x_values, y_values, size=10, line_color="navy", 
                       legend_label="Test Data", fill_color="red")
    plot_figure.line(x_values, predicted_y_values, line_width=5, legend_label="Slope & Intercept")

    show(plot_figure)

    # Classification of test data points
    test_csv_path = "csv_src/test.csv"
    test_function_manager = FunctionAdministrator(csv_source=test_csv_path)
    test_function = test_function_manager.functions[0]

    classified_points = []
    for test_point in test_function:
        best_match_function, delta_y = classification_search(point=test_point, idl_funcs=selected_ideal_functions)
        classification_result = {"point": test_point, "classification": best_match_function, "delta_y": delta_y}
        classified_points.append(classification_result)

    plot_point(classified_points, "point_and_ideal")
    sqlite_store_deviation_result(classified_points)

    print("Name: Ibrahim Ahmed Ali Al-Jaf, Matriculation Number: 10241869, Master of AI")
    print("Date: 15.03.2025")