In [1]:
import sqlite3

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
con = sqlite3.connect("DB.db")
cursor = con.cursor()

In [3]:
def get_price_change(product_name : 'str'):
    """Queries database for specific product's info and returns history of it's price changes."""    
    query = 'SELECT * FROM prices WHERE product = ? ORDER BY date_start'
    
    result = cursor.execute(query, [product_name]).fetchall()
    names = [x[0] for x in cursor.description]
    
    df = pd.DataFrame(result, columns=names)
    df.date_start = pd.to_datetime(df['date_start']).dt.date.astype('str')
    
    prices_per_date = (df.date_start.tolist(), df.price.tolist())
    return prices_per_date

In [5]:
def get_revenue_std(product_name : 'str'):
    """Queries database for specific product's revenue history and returns it's standard deviation."""       
    query = 'SELECT revenue FROM revenue WHERE product = ?'
    
    result = cursor.execute(query, [product_name]).fetchall()
    prod_std = round(np.std(result), 2)
    
    return prod_std

In [6]:
def lin_reg(product_name : 'str', start_date : 'str', end_date : 'str'):
    """Trains a linear regression model for amount of sold product vs day in selected dates' interval."""
    query = """SELECT product, date, amount FROM revenue WHERE (product = ?)
    AND (date BETWEEN ? AND ?) ORDER BY date"""
    
    result = cursor.execute(query, [product_name, start_date, end_date]).fetchall()
    names = [x[0] for x in cursor.description]
    df = pd.DataFrame(result, columns=names)
    
    x = df.index.values.reshape(-1, 1)
    y = df.amount.values.reshape(-1, 1)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)
    
    lrm = LinearRegression()
    model = lrm.fit(x_train, y_train)
    y_pred = lrm.predict(x_test)
    
    k = lrm.coef_
    b = lrm.intercept_
    
    return k, b

In [8]:
cursor.close()
con.close()