The following blocks of code show the SQLite database implementation in Python.

In [None]:
import sqlite3, csv, numpy as np, matplotlib.pyplot as plt, pandas as pd, scipy.stats as stats, sklearn

`sqlite3.connect(':memory:')`: creates temporary database storage that will reset after each execution of the code

`sqlite3.connect('database.db')`: creates a database file that saves all additions and modifications through subsequent executions (i.e. like a storage server)

In [None]:
conn = sqlite3.connect(':memory:')
c = conn.cursor()

In [None]:
def create_table(name,r):
    with conn:
        c.execute("CREATE TABLE " + name + " (time REAL, frequency REAL)")
        
        # formerly the insert_point function, now infused into a for-loop within create_table
        for line in r:
            c.execute("INSERT INTO " + name + " VALUES (:x, :y)", {
                'x': line[0], # time column
                'y': line[1]  # frequency column
            })

def show_table(name):
    c.execute("SELECT * FROM " + name)
    return c.fetchall()
            
def get_no_of_rows(name):
    c.execute("SELECT COUNT(*) FROM " + name)
    return c.fetchone()[0]

def delete_point(name, x, y):
    with conn:
        c.execute("DELETE FROM " + name + " WHERE time = :x AND frequency = :y", {'x': x, 'y': y})

In [None]:
file_name = 'P7132038_32'
reader = csv.reader(open(file_name + '.csv'))

next(reader) # skip over first-row labels

In [None]:
create_table(file_name,reader)

In [None]:
show_table('P7132038_32')

In [None]:
get_no_of_rows(file_name)

The following blocks show an implementation of calculating a regression line using the least-squares method.

In [None]:
c.execute("SELECT time FROM " + file_name)
x = np.array([line[0] for line in c])

c.execute("SELECT frequency FROM " + file_name)
y = np.array([line[0] for line in c])

In [None]:
def least_squares_coefficients(x, y):
    n = get_no_of_rows(file_name) # number of observations
    
    mean_x = np.mean(x) # mean of all x values
    mean_y = np.mean(y) # mean of all y values
    
    cross_deviation_xy = np.sum(y*x - n*mean_x*mean_y)     # sum of cross-deviations of y and x
    deviation_squared_xx = np.sum(x**2 - n*(mean_x**2))    # sum of the squared deviations of x
    
    slope = cross_deviation_xy / deviation_squared_xx
    y_int = mean_y - slope*mean_x
    
    return(y_int, slope)

def plot_ls_regression_line(x, y, c):
    plt.scatter(x, y)                      # scatter plot
    
    predicted_y = c[0] + c[1]*x            # get the equation of the line
    
    plt.plot(x, predicted_y, color="b")    # plot the line
    
    plt.xlabel('time')
    plt.ylabel('frequency')
    
    plt.show()

In [None]:
coefficients = least_squares_coefficients(x, y)
print("y-intercept: %f" % coefficients[0])
print("slope: %f" % coefficients[1])
print("linear regression line: predicted y = %f + %fx" % (coefficients[0], coefficients[1]))

plot_ls_regression_line(x, y, coefficients)

In [None]:
conn.close()