In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pandas as pd

In [3]:
def read_csv(stock):
    filepath = f"C:/Users/danie/Documents/Software/Python-Finance-QuantConnect/DATA/{stock}.csv"
    df = pd.read_csv(filepath)
    print("len(df): ", len(df))
    return df

In [33]:
def technical_indicators(df, n_ma, n_future):
    """
    calculate technical indicators of the stock
    :returns: updated dataframe
    """
    df['Daily Returns'] = df["Adj Close"].pct_change(1)
    df['Log Returns'] = np.log(1 + df['Daily Returns'])
    # TODO: drop these two lines (and check)
#     df.dropna(inplace=True) 
#     df.reset_index(drop=True, inplace=True)
    
    # Calculate Exponential Moving Average (EMA)
    df['EMA'] = df['Log Returns'].ewm(span=n_ma, adjust=False).mean()
    # Calculate the simple moving average (SMA) for a window of n days
    df['SMA'] = df['Log Returns'].rolling(window=n_ma).mean()
    # SMA used for dependent variable
    df['y_SMA'] = df['Log Returns'].rolling(window=n_future).mean()
    
    # Calculate the short-term EMA (12 periods)
    df['EMA_12'] = df['Adj Close'].ewm(span=12, adjust=False).mean()
    # Calculate the long-term EMA (26 periods)
    df['EMA_26'] = df['Adj Close'].ewm(span=26, adjust=False).mean()
    # Calculate the MACD line
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    # Calculate the Signal line
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    # Calculate the rate of change for MACD
    df['MACD_Rate_of_Change'] = df['MACD'].pct_change()
    df['MACD_Rate_of_Change'].replace([np.inf, -np.inf], np.nan, inplace=True)
    # You can then fill the NaNs with a suitable value, like 0 or the mean of the column
#     df['MACD_Rate_of_Change'].fillna(0, inplace=True)

    # If you want to fill with the mean, ensure to calculate the mean without the inf/-inf values
    mean_value = df['MACD_Rate_of_Change'].mean()
    df['MACD_Rate_of_Change'].fillna(mean_value, inplace=True)

    # Encoding the crossover
    df['MACD_Crossover_Up'] = (df['MACD'] > df['Signal_Line']).astype(int)
    df['MACD_Crossover_Down'] = (df['MACD'] < df['Signal_Line']).astype(int)
    
    max_value_threshold = 1e9  # example threshold, adjust as necessary
    if df['MACD_Rate_of_Change'].abs().max() > max_value_threshold:
        print(f"Values too large found in 'MACD_Rate_of_Change' exceeding {max_value_threshold}")

    df = df.dropna(subset=['Log Returns', 'EMA'])
    df.reset_index(drop=True, inplace=True)
    return df

In [34]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

def LR_MACD(df, n_days, n_ma, n_future):  
    
    df = technical_indicators(df, n_ma, n_future)
#     print("df.head: ", df.head())
#     print("df.head: ", df.head())
    
    # Initialize X and Y
    X = []
    Y = []
    
    # Populate X and Y
    for i in range(n_days*n_ma, len(df)-(1+max(0,n_future-n_ma))):
#     for i in range(n_days*n_ma, n_days*n_ma + 55):
        emas = df[['EMA']].iloc[i-n_days*n_ma:i:n_ma].values.flatten()
        i_now = i-n_days*n_ma+n_ma
        macd_value = df['MACD'].iloc[i_now]
        signal_line = df['Signal_Line'].iloc[i_now]
        macd_rate_of_change = df['MACD_Rate_of_Change'].iloc[i_now]
        macd_crossover_up = df['MACD_Crossover_Up'].iloc[i_now]
        macd_crossover_down = df['MACD_Crossover_Down'].iloc[i_now]

        features = list(emas) + [macd_value, signal_line, macd_rate_of_change, macd_crossover_up, macd_crossover_down]
        X.append(features)
        y_SMA = df['y_SMA'].iloc[i - n_ma + n_future]
        Y.append(y_SMA)

    # split data
    test_size = int(len(X) * 0.2)

    # Training set
    X_train = X[:-test_size]
    y_train = Y[:-test_size]

    # Testing set
    X_test = X[-test_size:]
    y_test = Y[-test_size:]
    
    # Fit the model on the training data
    model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())
    model.fit(X_train, y_train)
    # normalize = False (input data)
    # copy_x = True (overwrite input variables)
    # n_jobs = None (number of parallelism. -1 uses all available processors)

    r_sq = model.score(X_test, y_test)
#     print(f"coefficient of determination: {r_sq}")
#     print(f"intercept: {model.intercept_}")
#     print(f"slope: {model.coef_}")
    return r_sq

In [39]:
import time

def main():
    stocks = ['AAPL', 'DG', 'JPM', 'HSBC'] # ['AAPL','BAC','COST','C','DG','FB','HSBC','JPM']
    best = {}
    secondbest = {}

    # Start time for the whole calculation
    start_time_total = time.time()

    for stock in stocks:
        # Start time for the current stock calculation
        start_time_stock = time.time()

        df = read_csv(stock)
        best[stock] = {'r_sq': -1e9}
        secondbest[stock] = {'r_sq': -1e9}

         # Get the ranges for the current stock
        stock_ranges = ranges[stock]
        print(f"stock_ranges: {stock_ranges}")
        days_list = list(range(*stock_ranges['days']))
        n_mas = list(range(*stock_ranges['n_ma']))
        n_futures = list(range(*stock_ranges['n_future']))
#         days_list = n_mas = n_futures = [1,5]

        for days in days_list:
            for n_ma in n_mas:
                for n_future in n_futures:
#                     print(f"days: {days}, n_ma: {n_ma}, n_future: {n_future}")
                    r_sq = LR_MACD(df.copy(), days, n_ma, n_future)
                    if r_sq > best[stock]['r_sq']:
                        secondbest[stock] = best[stock]
                        best[stock] = {'r_sq': r_sq, 'days': days, 
                                       'n_ma': n_ma, 'n_future': n_future}
                        print("new best[stock]: ", best[stock])
                    elif r_sq > secondbest[stock]['r_sq']:
                        secondbest[stock] = {'r_sq': r_sq, 'days': days, 
                                             'n_ma': n_ma, 'n_future': n_future}
                        print("new secondbest")
            print(". ", end="")
    #         print(f"{days} days best: {best[stock]}")
        # End time for the current stock calculation
        end_time_stock = time.time()
        print(f"Time for {stock}: {(end_time_stock - start_time_stock)//60:.0f} minutes"
              f" and {(end_time_stock - start_time_stock)%60:.0f} seconds.")
        print(f"best[{stock}] is: {best[stock]}\n")

    end_time_total = time.time()
    print(f"Total time: {(end_time_total - start_time_total)//60:.0f} minutes"
          f" and {(end_time_total - start_time_total)%60:.0f} seconds")
#     print("best:\n", best)
    print_comparison3(best, secondbest)
    
main()

len(df):  1258
stock_ranges: {'days': (1, 21), 'n_ma': (1, 21), 'n_future': (1, 21)}
new best[stock]:  {'r_sq': -0.09048121695108957, 'days': 1, 'n_ma': 1, 'n_future': 1}
new best[stock]:  {'r_sq': -0.06070573991807482, 'days': 1, 'n_ma': 1, 'n_future': 2}
new secondbest
new best[stock]:  {'r_sq': -0.002825731763507422, 'days': 1, 'n_ma': 5, 'n_future': 1}
new secondbest
new best[stock]:  {'r_sq': 0.005267734963887105, 'days': 1, 'n_ma': 6, 'n_future': 1}
new best[stock]:  {'r_sq': 0.041487514923610136, 'days': 1, 'n_ma': 6, 'n_future': 2}
new best[stock]:  {'r_sq': 0.05360412206287779, 'days': 1, 'n_ma': 6, 'n_future': 3}
new best[stock]:  {'r_sq': 0.06610139035855067, 'days': 1, 'n_ma': 7, 'n_future': 1}
new best[stock]:  {'r_sq': 0.07824037863399147, 'days': 1, 'n_ma': 7, 'n_future': 2}
new best[stock]:  {'r_sq': 0.13746743350663748, 'days': 1, 'n_ma': 7, 'n_future': 3}
new best[stock]:  {'r_sq': 0.1653940079728441, 'days': 1, 'n_ma': 7, 'n_future': 4}
new secondbest
new best[stock]

new best[stock]:  {'r_sq': 0.8241129573768883, 'days': 1, 'n_ma': 12, 'n_future': 12}
new best[stock]:  {'r_sq': 0.8241151397777726, 'days': 1, 'n_ma': 19, 'n_future': 18}
new best[stock]:  {'r_sq': 0.827970701781777, 'days': 1, 'n_ma': 20, 'n_future': 19}
. . . . . . . . . . . . . . . . . . . . Time for HSBC: 45 minutes and 22 seconds.
best[HSBC] is: {'r_sq': 0.827970701781777, 'days': 1, 'n_ma': 20, 'n_future': 19}

Total time: 180 minutes and 13 seconds
Ticker        R^2                 Days        n_ma      n_future
----------------------------------------------------------------
AAPL      0.51156 (   0.51037)      1 (  1)     14 ( 14)     12 ( 11)
DG        0.55737 (   0.54199)      1 (  1)     20 ( 19)     20 ( 19)
JPM       0.83675 (   0.83207)      1 (  1)      9 (  8)      9 (  8)
HSBC      0.82797 (   0.82412)      1 (  1)     20 ( 19)     19 ( 18)

Performance Summary:
Metric        R^2                Days                n_ma               n_fut
Average   0.68341 ( 0.67714) 

In [38]:
ranges = {'AAPL': {'days': (1, 21), 'n_ma': (1, 21), 'n_future': (1, 21)},
         'BAC': {'days': (1, 11), 'n_ma': (8, 21), 'n_future': (6, 17)},
         'COST':  {'days': (8, 15), 'n_ma': (15, 25),'n_future': (18, 24)},
         'C':     {'days': (1, 6),  'n_ma': (1, 11), 'n_future': (1, 5)},
         'DG':    {'days': (1, 21), 'n_ma': (1, 21), 'n_future': (1, 21)},
         'FB':    {'days': (1, 9), 'n_ma': (8, 21), 'n_future': (4, 11)},
         'HSBC':  {'days': (1, 21), 'n_ma': (1, 21), 'n_future': (1, 21)},
         'JPM':   {'days': (1, 21), 'n_ma': (1, 21), 'n_future': (1, 21)}}

# Ticker        R^2                 Days       n_ma       n_future
# -----------------------------------------------------------------
# AAPL      0.02480 (   0.02455)      6 (  6)     17 ( 17)         8 (        6)
# BAC       0.00026 (  -0.00018)      4 (  3)     12 ( 12)         1 (        1)
# COST      0.06092 (   0.05711)      9 (  7)     16 ( 16)        10 (       10)
# C         0.00722 (   0.00099)      2 (  1)      1 ( 20)         1 (        2)
# DG        0.01292 (   0.00944)      7 (  5)      9 ( 18)         2 (        2)
# FB        0.06486 (   0.05884)     10 (  6)     10 ( 18)         5 (        8)
# HSBC      0.01981 (   0.01823)      7 (  3)      2 (  1)         1 (        1)
# JPM       0.00436 (   0.00224)      4 (  3)     12 ( 12)         2 (        2)

In [270]:
import cProfile
if __name__ == "__main__":
    cProfile.run('main()', 'profiling_results.out')

len(df):  1258
stock_ranges: {'days': (7, 12), 'n_ma': (2, 12), 'n_future': (1, 5)}
new best[stock]:  {'r_sq': 0.004367860912637411, 'days': 7, 'n_ma': 2, 'n_future': 1}
new best[stock]:  {'r_sq': 0.006212028567458505, 'days': 7, 'n_ma': 3, 'n_future': 3}
new best[stock]:  {'r_sq': 0.007486773938031432, 'days': 7, 'n_ma': 6, 'n_future': 1}
new best[stock]:  {'r_sq': 0.012391589435892136, 'days': 7, 'n_ma': 7, 'n_future': 1}
.new best[stock]:  {'r_sq': 0.017431723477691174, 'days': 8, 'n_ma': 3, 'n_future': 3}
..new best[stock]:  {'r_sq': 0.022319693618527814, 'days': 10, 'n_ma': 10, 'n_future': 1}
.new best[stock]:  {'r_sq': 0.023623854324563487, 'days': 11, 'n_ma': 10, 'n_future': 1}
new best[stock]:  {'r_sq': 0.025996372211829044, 'days': 11, 'n_ma': 10, 'n_future': 3}
.Time for AAPL: 2.0 minutes and 15.316807508468628 seconds.
best[AAPL] is: {'r_sq': 0.025996372211829044, 'days': 11, 'n_ma': 10, 'n_future': 3}

len(df):  1259
stock_ranges: {'days': (1, 5), 'n_ma': (1, 6), 'n_future'

In [None]:
import pstats

# Create a Stats object
p = pstats.Stats('profiling_results.out')

# Print the statistics
# p.strip_dirs().sort_stats(-1).print_stats()
p.sort_stats('cumulative').print_stats(40)

In [269]:
ranges = {
    'AAPL': {'days': (7, 12), 'n_ma': (7, 12), 'n_future': (1, 5)},
    'BAC': {'days': (1, 11), 'n_ma': (1, 15), 'n_future': (1, 11)},
    'COST': {'days': (8, 11), 'n_ma': (21, 23), 'n_future': (21, 24)},
    'C': {'days': (1, 4), 'n_ma': (1, 12), 'n_future': (1, 4)},
    'DG': {'days': (4, 9), 'n_ma': (8, 20), 'n_future': (1, 4)},
    'FB': {'days': (1, 3), 'n_ma': (7, 10), 'n_future': (5, 10)},
    'HSBC': {'days': (1, 5), 'n_ma': (1, 3), 'n_future': (1, 3)},
    'JPM': {'days': (2, 6), 'n_ma': (11, 14), 'n_future': (1, 4)}
}
# Ticker        R^2                 Days        n_ma      n_future
# ----------------------------------------------------------------
# AAPL      0.02232 (   0.01743)     10 (  8)     10 (  3)      1 (  3)
# BAC      -0.00068 (  -0.00268)      3 (  2)      2 (  4)      1 (  1)
# COST      0.18193 (   0.17626)      9 (  9)     21 ( 21)     22 ( 21)
# C         0.00722 (  -0.00139)      2 (  1)      1 ( 10)      1 (  2)
# DG        0.00890 (   0.00539)      7 (  5)      9 ( 18)      2 (  2)
# FB        0.04299 (   0.03557)      1 (  1)      8 (  8)      8 (  6)
# HSBC      0.01823 (   0.01249)      3 (  2)      1 (  1)      1 (  1)
# JPM       0.00169 (  -0.00328)      4 (  3)     12 ( 12)      2 (  1)

In [9]:
import statistics
import math

def print_comparison3(best, secondbest):
    # Print the header
    header = f"{'Ticker':<6} {'R^2':>10} {'':>13} {'Days':>6} {'':>4} {'n_ma':>6} {'':>4} {'n_future':>6}"
    print(header)
    print('-' * len(header))

    # Initialize lists to hold the values for calculating the summary statistics
    r2_values = []
    r2_values_second = []
    days_values = []
    days_values_second = []
    n_ma_values = []
    n_ma_values_second = []
    n_future_values = []
    n_future_values_second = []

    # Print each item and collect values for the summary
    for ticker in best:
        best_metrics = best[ticker]
        secondbest_metrics = secondbest.get(ticker, {})
        
        # Append values for best and second best performance
        r2_values.append(best_metrics['r_sq'])
        r2_values_second.append(secondbest_metrics.get('r_sq', float('nan')))
        days_values.append(best_metrics['days'])
        days_values_second.append(secondbest_metrics.get('days', float('nan')))
        n_ma_values.append(best_metrics['n_ma'])
        n_ma_values_second.append(secondbest_metrics.get('n_ma', float('nan')))
        n_future_values.append(best_metrics['n_future'])
        n_future_values_second.append(secondbest_metrics.get('n_future', float('nan')))
        
        # Format and print the line for each ticker
        line = f"{ticker:<6} {best_metrics['r_sq']:>10.5f} ({secondbest_metrics.get('r_sq', 'n/a'):>10.5f}) "
        line += f"{best_metrics['days']:>6} ({secondbest_metrics.get('days', 'n/a'):>3}) "
        line += f"{best_metrics['n_ma']:>6} ({secondbest_metrics.get('n_ma', 'n/a'):>3})"
        line += f" {best_metrics['n_future']:>6} ({secondbest_metrics.get('n_future', 'n/a'):>3})"
        print(line)

    # Calculate the summary statistics for best and second best performances
    def calculate_summary(values):
        # Filter out nan values for accurate calculation
        filtered_values = [v for v in values if not math.isnan(v)]
        average = statistics.mean(filtered_values)
        median = statistics.median(filtered_values)
        stdev = statistics.stdev(filtered_values) if len(filtered_values) > 1 else 0
        return average, median, stdev

    # Calculate and print the best performance summary
    r2_avg, r2_med, r2_stdev = calculate_summary(r2_values)
    days_avg, days_med, days_stdev = calculate_summary(days_values)
    n_ma_avg, n_ma_med, n_ma_stdev = calculate_summary(n_ma_values)
    n_future_avg, n_future_med, n_future_stdev = calculate_summary(n_future_values)
    
    # Calculate and print the second best performance summary
    r2_avg_second, r2_med_second, r2_stdev_second = calculate_summary(r2_values_second)
    days_avg_second, days_med_second, days_stdev_second = calculate_summary(days_values_second)
    n_ma_avg_second, n_ma_med_second, n_ma_stdev_second = calculate_summary(n_ma_values_second)
    n_future_avg_second, n_future_med_second, n_future_stdev_second = calculate_summary(n_future_values_second)


    # Print the rows for Average, Median, and StDev with the calculated values
    print("\nPerformance Summary:")
    print(f"{'Metric':<8} {'R^2':>8.5} {'':>10} {'Days':>8.5} {'':>10} {'n_ma':>8.5} {'':>10} {'n_future':>8.5}")
    # Print the rows for Average, Median, and StDev with the best and second-best values
    print(f"{'Average':<8} {r2_avg:>8.5f} ({r2_avg_second:>8.5f}) {days_avg:>5.2f} ({days_avg_second:>5.2f}) {n_ma_avg:>5.2f} ({n_ma_avg_second:>5.2f}) {n_future_avg:>5.2f} ({n_future_avg_second:>5.2f})")
    print(f"{'Median':<8} {r2_med:>8.5f} ({r2_med_second:>8.5f}) {days_med:>5.2f} ({days_med_second:>5.2f}) {n_ma_med:>5.2f} ({n_ma_med_second:>5.2f}) {n_future_med:>5.2f} ({n_future_med_second:>5.2f})")
    print(f"{'StDev':<8} {r2_stdev:>8.5f} ({r2_stdev_second:>8.5f}) {days_stdev:>5.2f} ({days_stdev_second:>5.2f}) {n_ma_stdev:>5.2f} ({n_ma_stdev_second:>5.2f}) {n_future_stdev:>5.2f} ({n_future_stdev_second:>5.2f})")

In [70]:
def LR_runs(df, n_days):    
    df.reset_index(drop=True, inplace=True)
        
    # Initialize X and Y
    X = []
    Y = []
    
    # Populate X and Y
    # n = max(n_days, n_sma)
#     print(df[['Adj Close','SMA']].head())
#     for i in range(n_days*n_sma, n_days*n_sma+5):
    for i in range(n_days, len(df)):
        #print("i: ", i)
        X.append(df[['Log Returns']].iloc[i-n_days:i].values.flatten())
        Y.append(df['Log Returns'].iloc[i])
#     print(f"len(X)= {len(X)}, X[{i}]: {X}")
#     print(f"len(Y)= {len(Y)}, Y[{i}]: {Y}")
#     print()
#     print(df['SMA'].head(10))

    # split data
    test_size = int(len(X) * 0.2)

    # Training set
    X_train = X[:-test_size]
    y_train = Y[:-test_size]

    # Testing set
    X_test = X[-test_size:]
    y_test = Y[-test_size:]

    # Fit the model on the training data
    model = LinearRegression().fit(X_train, y_train)
    # normalize = False (input data)
    # copy_x = True (overwrite input variables)
    # n_jobs = None (number of parallelism. -1 uses all available processors)

    r_sq = model.score(X_test, y_test)
    print(f"coefficient of determination: {r_sq}")
    print(f"intercept: {model.intercept_}")
    print(f"slope: {model.coef_}")
    return r_sq

In [5]:
def LR_EMA(df, n_days, n_ema):    
    # Calculate Exponential Moving Average (EMA)
    df['EMA'] = df['Log Returns'].ewm(span=n_ema, adjust=False).mean()

    df = df.dropna(subset=['Log Returns', 'EMA'])
    df.reset_index(drop=True, inplace=True)
    
    # Initialize X and Y
    X = []
    Y = []
    
    # Populate X and Y
    for i in range(n_days*n_ema, len(df)):
        #print("i: ", i)
        X.append(df[['EMA']].iloc[i-n_days*n_ema:i:n_ema].values.flatten())
        Y.append(df['EMA'].iloc[i])

    # split data
    test_size = int(len(X) * 0.2)

    # Training set
    X_train = X[:-test_size]
    y_train = Y[:-test_size]

    # Testing set
    X_test = X[-test_size:]
    y_test = Y[-test_size:]

    # Fit the model on the training data
    model = LinearRegression(n_jobs=-1).fit(X_train, y_train)
    # normalize = False (input data)
    # copy_x = True (overwrite input variables)
    # n_jobs = None (number of parallelism. -1 uses all available processors)

    r_sq = model.score(X_test, y_test)
#     print(f"coefficient of determination: {r_sq}")
#     print(f"intercept: {model.intercept_}")
#     print(f"slope: {model.coef_}")
    return r_sq

In [3]:
def LR_SMA(df, n_days, n_sma):    
    # Calculate the simple moving average (SMA) for a window of n days
    df['SMA'] = df['Log Returns'].rolling(window=n_sma).mean()

    df = df.dropna(subset=['Log Returns', 'SMA'])
    df.reset_index(drop=True, inplace=True)
    
    # Initialize X and Y
    X = []
    Y = []
    
    # Populate X and Y
    for i in range(n_days*n_sma, len(df)):
        #print("i: ", i)
        X.append(df[['SMA']].iloc[i-n_days*n_sma:i:n_sma].values.flatten())
#         X.append(df[['SMA']].iloc[i-n_days*n_sma:i:n_sma].values.flatten())
        Y.append(df['SMA'].iloc[i])

    # split data
    test_size = int(len(X) * 0.2)

    # Training set
    X_train = X[:-test_size]
    y_train = Y[:-test_size]

    # Testing set
    X_test = X[-test_size:]
    y_test = Y[-test_size:]

    # Fit the model on the training data
    model = LinearRegression(n_jobs=-1).fit(X_train, y_train)
    # normalize = False (input data)
    # copy_x = True (overwrite input variables)
    # n_jobs = None (number of parallelism. -1 uses all available processors)

    r_sq = model.score(X_test, y_test)
#     print(f"coefficient of determination: {r_sq}")
#     print(f"intercept: {model.intercept_}")
#     print(f"slope: {model.coef_}")
    return r_sq

In [25]:
def LR_tomorrow(df, n_days, n_ma, n_future):  
    
    df_tech = technical_indicators(df, n_ma, n_future)
    
    # Initialize X and Y
    X = []
    Y = []
    
    # Populate X and Y
    for i in range(n_days*n_ma, len(df_tech)-(-n_ma+1+n_future)):
#     for i in range(n_days*n_ma, n_days*n_ma + 45):
        emas = df_tech[['EMA']].iloc[i-n_days*n_ma:i:n_ma].values.flatten()
        X.append(emas)
        y_SMA = df['y_SMA'].iloc[i - n_ma + n_future]
        Y.append(y_SMA)

    # split data
    test_size = int(len(X) * 0.2)

    # Training set
    X_train = X[:-test_size]
    y_train = Y[:-test_size]

    # Testing set
    X_test = X[-test_size:]
    y_test = Y[-test_size:]
    
    # Fit the model on the training data
    model = LinearRegression(n_jobs=-1).fit(X_train, y_train)
    # normalize = False (input data)
    # copy_x = True (overwrite input variables)
    # n_jobs = None (number of parallelism. -1 uses all available processors)

    r_sq = model.score(X_test, y_test)
#     print(f"coefficient of determination: {r_sq}")
#     print(f"intercept: {model.intercept_}")
#     print(f"slope: {model.coef_}")
    return r_sq