In [32]:
import pandas as pd
from sqlalchemy import create_engine, Column, Integer, Float, MetaData, Table
from sqlalchemy.orm import sessionmaker
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
import unittest

<div style="color:#FBDC8A; font-family: Calibri; font-size: 16pt; text-align: center;">
     <strong>Building Database and DataFrame <strong> </div>
        <em> Let's create a database with three tables reflecting data shared in the assigment and split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.<em>
    </div>

In [22]:
class DataManager:
    def __init__(self, database_url):
        self.engine = create_engine(database_url)
        self.metadata = MetaData(bind=self.engine)
        self.Session = sessionmaker(bind=self.engine)
        self.metadata.create_all()

    def create_tables(self, train_file, test_file, ideal_file):
        # Load DataFrames
        train_df = pd.read_csv(train_file)
        test_df = pd.read_csv(test_file)
        ideal_df = pd.read_csv(ideal_file)

        # Define table structures
        train_table = Table('train', self.metadata,
                           Column('x', Float),
                           Column('y', Float),
                           )

        test_table = Table('test', self.metadata,
                          Column('x', Float),
                          Column('y', Float),
                          )

        ideal_table = Table('ideal', self.metadata,
                           Column('x', Float),
                           Column('y', Float),
                           )

        # Create a session to insert data
        session = self.Session()

        try:
            # Insert data into 'train' table
            train_df.to_sql('train', con=self.engine, if_exists='replace', index=False)

            # Insert data into 'test' table
            test_df.to_sql('test', con=self.engine, if_exists='replace', index=False)

            # Insert data into 'ideal' table
            ideal_df.to_sql('ideal', con=self.engine, if_exists='replace', index=False)

            # Commit the changes
            session.commit()

        except Exception as e:
            print(f"Error: {e}")
            # Rollback in case of error
            session.rollback()

        finally:
            # Close the session
            session.close()

# Example usage:
# Assuming you have file paths for train, test, and ideal CSV files
train_file = 'train.csv'
test_file = 'test.csv'
ideal_file = 'ideal.csv'
database_url = "sqlite:///data.db"  # Change this to your desired database file path

data_manager = DataManager(database_url)
data_manager.create_tables(train_file, test_file, ideal_file)



<div style="color:#FBDC8A; font-family: Calibri; font-size: 16pt; text-align: center;">
     <strong>Check If Database and Data Frames Created<strong> </div>
    <em>Checking which tables, which columns and what type of values in rows exist in SQLLite database and monitoring the first 5 rows of dataframes if they are built for our training model<em>
    </div>

In [25]:
import sqlite3

def get_table_info(database_path):
    # Connect to the SQLite database
    connection = sqlite3.connect(database_path)
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    # Get the list of tables in the database
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    
    # Display general information about each table
    for table in tables:
        table_name = table[0]
        print(f"\nTable: {table_name}")
        
        # Get the column names and their data types
        cursor.execute(f"PRAGMA table_info({table_name});")
        columns = cursor.fetchall()
        
        # Display column information
        print("Column Name\tData Type")
        print("-------------------------")
        for column in columns:
            print(f"{column[1]}\t\t{column[2]}")
    
    # Close the cursor and connection
    cursor.close()
    connection.close()

# Replace 'your_database.db' with the actual name of your SQLite database file
database_path = 'data.db'
get_table_info(database_path)


Table: train
Column Name	Data Type
-------------------------
x		FLOAT
y1		FLOAT
y2		FLOAT
y3		FLOAT
y4		FLOAT

Table: test
Column Name	Data Type
-------------------------
x		FLOAT
y		FLOAT

Table: ideal
Column Name	Data Type
-------------------------
x		FLOAT
y1		FLOAT
y2		FLOAT
y3		FLOAT
y4		FLOAT
y5		FLOAT
y6		FLOAT
y7		FLOAT
y8		FLOAT
y9		FLOAT
y10		FLOAT
y11		FLOAT
y12		FLOAT
y13		FLOAT
y14		FLOAT
y15		FLOAT
y16		FLOAT
y17		FLOAT
y18		FLOAT
y19		FLOAT
y20		FLOAT
y21		FLOAT
y22		FLOAT
y23		FLOAT
y24		FLOAT
y25		FLOAT
y26		FLOAT
y27		FLOAT
y28		FLOAT
y29		FLOAT
y30		FLOAT
y31		FLOAT
y32		FLOAT
y33		FLOAT
y34		FLOAT
y35		FLOAT
y36		FLOAT
y37		FLOAT
y38		FLOAT
y39		FLOAT
y40		FLOAT
y41		FLOAT
y42		FLOAT
y43		FLOAT
y44		FLOAT
y45		FLOAT
y46		FLOAT
y47		FLOAT
y48		FLOAT
y49		FLOAT
y50		FLOAT


In [26]:
test_df.head()

Unnamed: 0,x,y
0,17.5,34.16104
1,0.3,1.215102
2,-8.7,-16.843908
3,-19.2,-37.17087
4,-11.0,-20.263054


In [27]:
train_df.head()

Unnamed: 0,x,y1,y2,y3,y4
0,-20.0,39.778572,-40.07859,-20.214268,-0.324914
1,-19.9,39.604813,-39.784,-20.07095,-0.05882
2,-19.8,40.09907,-40.018845,-19.906782,-0.45183
3,-19.7,40.1511,-39.518402,-19.389118,-0.612044
4,-19.6,39.795662,-39.360065,-19.81589,-0.306076


In [28]:
ideal_df.head()

Unnamed: 0,x,y1,y2,y3,y4,y5,y6,y7,y8,y9,...,y41,y42,y43,y44,y45,y46,y47,y48,y49,y50
0,-20.0,-0.912945,0.408082,9.087055,5.408082,-9.087055,0.912945,-0.839071,-0.850919,0.816164,...,-40.456474,40.20404,2.995732,-0.008333,12.995732,5.298317,-5.298317,-0.186278,0.912945,0.39685
1,-19.9,-0.867644,0.497186,9.132356,5.497186,-9.132356,0.867644,-0.865213,0.168518,0.994372,...,-40.23382,40.04859,2.99072,-0.00834,12.99072,5.293305,-5.293305,-0.21569,0.867644,0.476954
2,-19.8,-0.813674,0.581322,9.186326,5.581322,-9.186326,0.813674,-0.889191,0.612391,1.162644,...,-40.006836,39.89066,2.985682,-0.008347,12.985682,5.288267,-5.288267,-0.236503,0.813674,0.549129
3,-19.7,-0.751573,0.659649,9.248426,5.659649,-9.248426,0.751573,-0.910947,-0.994669,1.319299,...,-39.775787,39.729824,2.980619,-0.008354,12.980619,5.283204,-5.283204,-0.247887,0.751573,0.61284
4,-19.6,-0.681964,0.731386,9.318036,5.731386,-9.318036,0.681964,-0.930426,0.774356,1.462772,...,-39.54098,39.565693,2.97553,-0.008361,12.97553,5.278115,-5.278115,-0.249389,0.681964,0.667902


<div style="color:#FBDC8A; font-family: Calibri; font-size: 16pt; text-align: center;">
     <strong>Exploratory Data Analysis for Data Sets<strong> </div>
    <em>Let's dig into data sets and visualize them in order to have better insight, so that we can select an optimum method to train our model.<em>
    </div>

In [54]:
# Create a Bokeh figure for training functions
# Create a list to store Bokeh figures
figures = []

# Create a Bokeh figure for each function
for i, column in enumerate(train_df.columns[1:]):  # Skip the 'x' column
    p = figure(title=f"Function {i + 1}", x_axis_label="x", y_axis_label=f"y{i + 1}")
    p.line(train_df['x'], train_df[column], line_width=1, line_color="blue", legend_label=f'Function {i + 1}')
    figures.append(p)

# Arrange figures in a grid
grid = gridplot([figures])

# Show the grid
show(grid)

<div style="color:#FBDC8A; font-family: Calibri; font-size: 16pt; text-align: center;">
     <strong>Plots created for training functions<strong> </div>


![Alt text](bokeh_plot.png)

In [60]:
# Create a Bokeh figure for ideal functions
# Define the number of rows and columns in the grid
num_rows, num_cols = 10, 5

# Calculate the number of sketches in the last column
sketches_in_last_col = len(ideal_df.columns[1:]) % num_cols

# Create a list to store Bokeh figures
figures = []

# Create a Bokeh figure for each function
for i, column in enumerate(ideal_df.columns[1:]):  # Skip the 'x' column
    row_idx, col_idx = divmod(i, num_cols)
    p = figure(title=f"Function {i + 1}", x_axis_label="x", y_axis_label=f"y{i + 1}")
    p.line(ideal_df['x'], ideal_df[column], line_width=4, line_color="blue", legend_label=f'Function {i + 1}')
    figures.append((p, row_idx, col_idx))

# If the last column has only one sketch, add an empty plot to fill the space
if sketches_in_last_col == 1:
    figures.append((figure(), num_rows - 1, num_cols - 1))

# Arrange figures in a grid
grid = gridplot([[figures[i * num_cols + j][0] for j in range(num_cols)] for i in range(num_rows)])

# Show the grid
show(grid)

<div style="color:#FBDC8A; font-family: Calibri; font-size: 16pt; text-align: center;">
     <strong>Plots created for ideal functions<strong> </div>


![Alt text](bokeh_plot(1).png)