# Import packages


In [24]:
import numpy as np
import pylab as pl
from numpy import fft
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import datetime
from dateutil.relativedelta import relativedelta
import math


In [1]:
def data_to_harmonics_function(data_stock):
    harmonics = {}
    for i in data_stock:
        harmonics[i] = {}
        # get data_stock's infomation
        data = data_stock[i]['Close']
        array_data = np.array(data)
        n_data = array_data.size
        time_data = np.arange(0, n_data)

        # detrend data
        # find linear trend in data
        Polynomial = np.polyfit(time_data, array_data, 1)
        data_notrend = array_data - Polynomial[0] * time_data    # detrended x

        # fft process
        data_freqdom = fft.fft(data_notrend, n=n_data)
        frequence = fft.fftfreq(n_data)
        f_positive = frequence[np.where(frequence > 0)]
        data_freqdom_positive = data_freqdom[np.where(frequence > 0)]

        # sort indexes
        indexes = list(range(f_positive.size))      # frequencies
        # sort method 1
        # indexes.sort(key = lambda i: np.absolute(frequence[i]))     # sort indexes by frequency, lower -> higher
        # sort method 2 :
        # sort indexes by amplitudes, lower -> higher
        indexes.sort(key=lambda i: np.absolute(data_freqdom[i]))
        indexes.reverse()       # sort indexes by amplitudes, higher -> lower

        # get data_all_time'size
        time_transfer = np.arange(0, 2*array_data.size)
        # mix harmonics
        for j in indexes:
            ampli = np.absolute(
                data_freqdom_positive[j]) / n_data     # amplitude
            phase = np.angle(data_freqdom_positive[j])      # phase
            harmonics[i][j] = ampli * \
                np.cos(2 * np.pi * f_positive[j] * time_transfer + phase)
    return harmonics


In [26]:
def find_data_pv_HL_function(data, pv_range):
    pd.options.mode.chained_assignment = None
    for i in data:
        p = data[i]['High']
        v = data[i]['Low']
        data[i]['peaks'] = pd.Series(dtype='float64')
        data[i]['valleys'] = pd.Series(dtype='float64')
        peaks = data[i]['peaks']
        valleys = data[i]['valleys']
        for idx in range(0, len(p)):
            if idx < pv_range:
                if p[idx] == p.iloc[0:pv_range*2+1].max():
                    peaks.iloc[idx] = p[idx]
                if v[idx] == v.iloc[0:pv_range*2+1].min():
                    valleys.iloc[idx] = v[idx]
            if p[idx] == p.iloc[idx-pv_range:idx+pv_range].max():
                peaks.iloc[idx] = p[idx]
            if v[idx] == v.iloc[idx-pv_range:idx+pv_range].min():
                valleys.iloc[idx] = v[idx]
        data[i]['peaks'] = peaks
        data[i]['valleys'] = valleys

In [27]:
def find_data_pv_CL_function(data, pv_range):
    pd.options.mode.chained_assignment = None
    for i in data:
        pv = data[i]['Close']
        data[i]['peaks'] = pd.Series(dtype='float64')
        data[i]['valleys'] = pd.Series(dtype='float64')
        peaks = data[i]['peaks']
        valleys = data[i]['valleys']
        for idx in range(0, len(pv)):
            if idx < pv_range:
                if pv[idx] == pv.iloc[0:pv_range*2+1].max():
                    peaks.iloc[idx] = pv[idx]
                if pv[idx] == pv.iloc[0:pv_range*2+1].min():
                    valleys.iloc[idx] = pv[idx]
            if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].max():
                peaks.iloc[idx] = pv[idx]
            if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].min():
                valleys.iloc[idx] = pv[idx]
        data[i]['peaks'] = peaks
        data[i]['valleys'] = valleys
    

In [28]:
def find_signal_pv_function(signal, pv_range):
    pd.options.mode.chained_assignment = None
    for i in signal:
        for j in signal[i]:
            pv = signal[i][j]['Close']
            signal[i][j]['peaks'] = pd.Series(dtype='float64')
            signal[i][j]['valleys'] = pd.Series(dtype='float64')
            peaks = signal[i][j]['peaks']
            valleys = signal[i][j]['valleys']
            for idx in range(0, len(pv)):
                if idx < pv_range:
                    if pv[idx] == pv.iloc[0:pv_range*2+1].max():
                        peaks.iloc[idx] = pv[idx]
                    if pv[idx] == pv.iloc[0:pv_range*2+1].min():
                        valleys.iloc[idx] = pv[idx]
                if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].max():
                    peaks.iloc[idx] = pv[idx]
                if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].min():
                    valleys.iloc[idx] = pv[idx]
            signal[i][j]['peaks'] = peaks
            signal[i][j]['valleys'] = valleys


In [29]:
def find_pv_lead_function(data, processed_signal):
    for d in data:
        for p in processed_signal[d]:
            processed_signal[d][p]['pv'] = pd.Series(dtype='str')
            processing_signal = processed_signal[d][p].loc[list(data[d].index)]
            p_data = pd.DataFrame(
                {'peaks': data[d]['peaks'], 'count': range(len(data[d]))})
            p_data = p_data.drop(p_data[p_data['peaks'].isna()].index)
            p_data_count = list(p_data['count'])
            p_signal = pd.DataFrame(
                {'peaks': processing_signal['peaks'], 'count': range(len(processing_signal))})
            p_signal = p_signal.drop(p_signal[p_signal['peaks'].isna()].index)
            p_signal_list = list(p_signal['count'])
            p_lead = []
            for i in range(0, len(p_signal_list)):
                temp = []
                temp_abs = []
                temp_2 = []
                for j in range(0, len(p_data_count)):
                    temp.append((p_data_count[j] - p_signal_list[i]))
                    temp_abs.append(abs(p_data_count[j] - p_signal_list[i]))
                for k in range(0, len(temp_abs)):
                    if temp_abs[k] == min(temp_abs):
                        temp_2 = temp[k]
                p_lead.append(temp_2)
            p_signal['lead'] = p_lead

            v_data = pd.DataFrame(
                {'valleys': data[d]['valleys'], 'count': range(len(data[d]))})
            v_data = v_data.drop(v_data[v_data['valleys'].isna()].index)
            v_data_count = list(v_data['count'])
            v_signal = pd.DataFrame(
                {'valleys': processing_signal['valleys'], 'count': range(len(processing_signal))})
            v_signal = v_signal.drop(
                v_signal[v_signal['valleys'].isna()].index)
            v_signal_list = list(v_signal['count'])
            v_lead = []
            for i in range(0, len(v_signal_list)):
                temp = []
                temp_abs = []
                temp_2 = []
                for j in range(0, len(v_data_count)):
                    temp.append((v_data_count[j] - v_signal_list[i]))
                    temp_abs.append(abs(v_data_count[j] - v_signal_list[i]))
                for k in range(0, len(temp_abs)):
                    if temp_abs[k] == min(temp_abs):
                        temp_2 = temp[k]
                v_lead.append(temp_2)
            v_signal['lead'] = v_lead

            processed_signal[d][p]['lead'] = pd.Series(dtype='float64')
            processed_signal[d][p]['lead'].loc[p_signal['lead'].index] = p_signal['lead']
            processed_signal[d][p]['pv'].loc[p_signal['lead'].index] = 'peak'
            processed_signal[d][p]['lead'].loc[v_signal['lead'].index] = v_signal['lead']
            processed_signal[d][p]['pv'].loc[v_signal['lead'].index] = 'valley'


In [30]:
def mix_harmonics_function(harmonics, n_harm_lower_limit, n_harm_upper_limit):
    processed_signal = {}
    for i in harmonics:
        processed_signal[i] = {}
        for n_harm in range(n_harm_lower_limit, n_harm_upper_limit+1):
            mixed_harmonic = np.zeros(len(harmonics[i][0]))
            for j in range(n_harm):
                mixed_harmonic += harmonics[i][j]
            processed_signal[i][n_harm] = pd.DataFrame(
                {'Close': mixed_harmonic})
    return processed_signal


In [31]:
def get_fit_error_function(processed_signal, fit_method):
    errors = {}
    error = []
    for i in processed_signal:
        errors[i] = {}
        for j in processed_signal[i]:
            signal_dropna = processed_signal[i][j].drop(
                processed_signal[i][j][processed_signal[i][j]['lead'].isna()].index)
            if fit_method == 'mean':
                error = signal_dropna['lead'].mean()
            elif fit_method == 'abs':
                error = abs(signal_dropna['lead']).mean()
            elif fit_method == 'rmse':
                MSE = np.square(np.subtract(np.zeros_like(signal_dropna['lead']),signal_dropna['lead'])).mean() 
                RMSE = math.sqrt(MSE)
                error = RMSE
            errors[i][j] = error
    return errors


In [32]:
def get_best_fit_harm_function(processed_signal, errors):
    best_error = {}
    best_fit_harm = {}
    for i in processed_signal:
        best_error[i] = pd.Series(errors[i]).abs().min()
        best_fit_harm[i] = pd.Series(errors[i]).abs().idxmin()
    return best_fit_harm, best_error


In [33]:
def get_first_lead_function(processed_signal, best_fit_harm):
    first_date = {}
    lead = {}
    pv = {}
    for i in processed_signal:
        harm = best_fit_harm[i]
        temp = processed_signal[i][harm].loc[list(
            processed_signal[i][harm]['lead'].dropna().index)[0]]
        first_date[i] = list(processed_signal[i][harm]
                             ['lead'].dropna().index)[0]
        lead[i] = temp['lead']
        pv[i] = temp['pv']
    return first_date, lead, pv


In [34]:
def load_data(stock_name, date_predict_start, data_range, slide_range, n_slide):
    train_data = {}
    test_data = {}
    date_predict_start = datetime.datetime.strptime(
        date_predict_start, '%Y-%m-%d')
    date_data_start_list = []
    date_predict_start_list = []
    date_predict_end_list = []
    for i in range(n_slide*2):
        date_data_start = date_predict_start - \
            relativedelta(days=+data_range)
        date_predict_end = date_predict_start + \
            relativedelta(days=+data_range)
        date_data_start_list.append(date_data_start)
        date_predict_start_list.append(date_predict_start)
        date_predict_end_list.append(date_predict_end)
        date_data_start = date_data_start + \
            relativedelta(days=+slide_range)
        date_predict_start = date_predict_start + \
            relativedelta(days=+slide_range)

    train_data_all = yf.Ticker(stock_name).history(
        start=date_data_start_list[0], end=date_predict_start_list[-1])
    test_data_all = yf.Ticker(stock_name).history(
        start=date_predict_start_list[0], end=date_predict_end_list[-1])
    test_data_all['count'] = range(len(test_data_all))
    test_data_start_list = []
    for i in range(n_slide):
        train_data['data_' + str(i)] = train_data_all.iloc[i *
                                                           slide_range:i*slide_range+data_range]
        train_data['data_' + str(i)] = train_data['data_' +
                                                  str(i)].reset_index(drop=True)
        test_data['data_' + str(i)] = test_data_all.iloc[i *
                                                         slide_range:i*slide_range+data_range]
        test_data_start_list.append(test_data['data_' + str(i)].index[0])
        test_data['data_' + str(i)] = test_data['data_' +
                                                str(i)].reset_index(drop=True)
    return train_data, test_data, test_data_all, test_data_start_list


In [35]:
def preprocessing(train_data, test_data, pv_range, pv_method):
    if pv_method == 'CL':
        find_data_pv_CL_function(train_data, pv_range)
        find_data_pv_CL_function(test_data, pv_range)
    elif pv_method == 'HL':
        find_data_pv_HL_function(train_data, pv_range)
        find_data_pv_HL_function(test_data, pv_range)


In [36]:
def build_model(train_data, n_harm_lower_limit, n_harm_upper_limit, pv_range):
    harmonics = data_to_harmonics_function(train_data)
    processed_signal = mix_harmonics_function(
        harmonics, n_harm_lower_limit, n_harm_upper_limit)
    find_signal_pv_function(processed_signal, pv_range)
    find_pv_lead_function(train_data, processed_signal)
    return harmonics, processed_signal


In [37]:
def select_model(processed_signal, fit_method):
    errors = get_fit_error_function(processed_signal, fit_method)
    best_fit_harm, best_error = get_best_fit_harm_function(
        processed_signal, errors)
    first_date, lead, pv = get_first_lead_function(
        processed_signal, best_fit_harm)
    return errors, best_fit_harm, best_error, first_date, lead, pv


In [38]:
def evaluate_model(processed_signal, test_data_start_list, test_data_all, best_fit_harm, best_error, first_date, lead, pv):
    result_table = pd.DataFrame(columns=[
        's_date', 't_delay', 't_date', 'lead', 'ans_date', 'pv', 'error', 'best_fit'])
    for i in processed_signal:
        result_table.loc[i, 'error'] = round(best_error[i], 2)
        result_table.loc[i, 'best_fit'] = best_fit_harm[i]
        result_table.loc[i, 't_delay'] = first_date[i]
        result_table.loc[i, 'lead'] = lead[i]
        result_table.loc[i, 'pv'] = pv[i]
    result_table['s_date'] = test_data_start_list
    for i in result_table.index:
        t_date = test_data_all.loc[test_data_all['count'] ==
                                   test_data_all['count'].loc[result_table.loc[i, 's_date']] +
                                   result_table.loc[i, 't_delay']].index[0]
        t_date = datetime.datetime.strftime(t_date, '%Y-%m-%d')
        result_table.loc[i, 't_date'] = t_date
        ans = test_data_all.loc[test_data_all['count'] == test_data_all['count']
                                .loc[result_table.loc[i, 't_date']] + result_table.loc[i, 'lead']].index[0]
        ans = datetime.datetime.strftime(ans, '%Y-%m-%d')
        result_table.loc[i, 'ans_date'] = ans
    final_error = round(
        sum([abs(ele) for ele in result_table['lead']]) / len(result_table['lead']), 2)
    result_table = result_table.drop(columns='t_delay')
    return result_table, final_error


In [39]:
def main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
        n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method):

    # 1. Load data
    train_data, test_data, test_data_all, test_data_start_list = load_data(
        stock_name, date_predict_start, data_range, slide_range, n_slide)
    # 2. Preprocessing
    preprocessing(train_data, test_data, pv_range, pv_method)
    # 3. Build model
    harmonics, model = build_model(
        train_data, n_harm_lower_limit, n_harm_upper_limit, pv_range)
    # 4. Train model
    errors, best_fit_harm, best_error, first_date, lead, pv = select_model(
        model, fit_method)
    # 5. Evaluate model
    result_table, final_error = evaluate_model(
        model, test_data_start_list, test_data_all, best_fit_harm, best_error, first_date, lead, pv)
    print('final_error = ', final_error)
    print(result_table)
    # 6. Predict

    return harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error


In [40]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 20
slide_range = 5
n_slide = 1
pv_range = 2
n_harm_lower_limit = 5
n_harm_upper_limit = 5
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.0
           s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0 2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  1.22        5


In [195]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.19
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  0.07       35
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.08       35
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.02       31
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.03       31
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.03       25
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.02       33
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley   0.0       27
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  0.02       40
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.08       21
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak   0.0       20
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley   0.0       21
data_11 2021-06-10       1  2021

In [196]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.19
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  0.79       31
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.85       37
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.88       33
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.78       31
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley   0.8       36
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.82       33
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley   0.8       26
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  0.79       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.81       28
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.83       20
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.84       25
data_11 2021-06-10       1  2021

In [38]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.19
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley   1.0       31
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.14       37
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  1.13       33
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.97       31
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  1.04       31
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  1.08       36
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley  1.12       26
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  1.07       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  1.06       28
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  1.16       28
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  1.13       25
data_11 2021-06-10       1  2021

In [41]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.08
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  0.0  2020-12-31  valley  1.48       38
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.33       38
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.85       39
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.81       31
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.88       39
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.83       40
data_6  2021-03-30       0  2021-03-30  1.0  2021-03-31  valley  0.83       26
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley  0.89       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.88       28
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.92       28
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.94       25
data_11 2021-06-10       1  2021

In [210]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.35
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  0.07       35
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak  0.15       34
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.08       35
data_3  2021-01-25       0  2021-01-25  4.0  2021-01-29    peak  0.18       34
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.02       31
data_5  2021-02-08       0  2021-02-08  2.0  2021-02-10    peak  0.03       36
data_6  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.03       31
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley   0.0       31
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.03       25
data_9  2021-03-09       0  2021-03-09  2.0  2021-03-11  valley  0.02       29
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.02       33
data_11 2021-03-23       1  2021

In [211]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.21
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  0.79       31
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak  0.84       26
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.85       37
data_3  2021-01-25       0  2021-01-25  4.0  2021-01-29    peak  0.89       29
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.88       33
data_5  2021-02-08       0  2021-02-08  2.0  2021-02-10    peak  0.89       29
data_6  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.78       31
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley  0.85       36
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley   0.8       36
data_9  2021-03-09       0  2021-03-09  2.0  2021-03-11  valley   0.8       36
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.82       33
data_11 2021-03-23       1  2021

In [44]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.19
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley   1.0       31
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak   1.1       29
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.14       37
data_3  2021-01-25       0  2021-01-25  4.0  2021-01-29    peak  1.26       29
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  1.13       33
data_5  2021-02-08       0  2021-02-08  2.0  2021-02-10    peak  1.15       29
data_6  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.97       31
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley   1.1       31
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  1.04       31
data_9  2021-03-09       0  2021-03-09  2.0  2021-03-11  valley  1.01       33
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  1.08       36
data_11 2021-03-23       1  2021

In [42]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  0.92
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  0.0  2020-12-31  valley  1.48       38
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak  1.44       32
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.33       38
data_3  2021-01-25       0  2021-01-25  3.0  2021-01-28    peak  0.95       38
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.85       39
data_5  2021-02-08       0  2021-02-08  1.0  2021-02-09    peak  0.85       39
data_6  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.81       31
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley  0.86       26
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.88       39
data_9  2021-03-09       0  2021-03-09  1.0  2021-03-10  valley  0.85       40
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.83       40
data_11 2021-03-23       1  2021

In [199]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  3.81
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       3  2021-01-06   1.0  2021-01-07    peak  0.11   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley  0.46   
data_2  2021-02-01       5  2021-02-08   2.0  2021-02-10    peak  0.04   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  0.52   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley  0.18   
data_5  2021-03-16       5  2021-03-23   1.0  2021-03-24    peak  0.11   
data_6  2021-03-30       3  2021-04-05   2.0  2021-04-07    peak  0.27   
data_7  2021-04-14       3  2021-04-19  -2.0  2021-04-15  valley  0.15   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak  0.04   
data_9  2021-05-12       0  2021-05-12  13.0  2021-06-01    peak  0.26   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  0.14   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley   0.0   
data_12 2021-06-24

In [200]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  3.62
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       4  2021-01-07   0.0  2021-01-07    peak  2.46   
data_1  2021-01-15       1  2021-01-19  -1.0  2021-01-15  valley  2.33   
data_2  2021-02-01       5  2021-02-08   2.0  2021-02-10    peak  2.21   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  2.25   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley   1.7   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  1.89   
data_6  2021-03-30       4  2021-04-06   1.0  2021-04-07    peak  1.76   
data_7  2021-04-14       1  2021-04-15   0.0  2021-04-15  valley  1.74   
data_8  2021-04-28       4  2021-05-04  -1.0  2021-05-03    peak  1.77   
data_9  2021-05-12       0  2021-05-12  13.0  2021-06-01    peak  2.39   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  1.94   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  1.52   
data_12 2021-06-24

In [43]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  3.69
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       1  2021-01-04   3.0  2021-01-07    peak  4.37   
data_1  2021-01-15       1  2021-01-19  -1.0  2021-01-15  valley  4.09   
data_2  2021-02-01       5  2021-02-08   2.0  2021-02-10    peak  4.06   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  4.01   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley  3.22   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  3.11   
data_6  2021-03-30       4  2021-04-06   1.0  2021-04-07    peak  3.21   
data_7  2021-04-14       1  2021-04-15   0.0  2021-04-15  valley  3.25   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak  3.06   
data_9  2021-05-12       0  2021-05-12  13.0  2021-06-01    peak  4.09   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  3.26   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  2.38   
data_12 2021-06-24

In [43]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  3.23
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       4  2021-01-07   2.0  2021-01-11    peak  3.97   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley  3.28   
data_2  2021-02-01       0  2021-02-01   6.0  2021-02-09    peak  4.21   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley  4.11   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley  2.28   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  2.64   
data_6  2021-03-30       3  2021-04-05   1.0  2021-04-06    peak  2.95   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley  2.86   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak  2.84   
data_9  2021-05-12       0  2021-05-12   2.0  2021-05-14    peak  2.79   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  2.91   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  2.92   
data_12 2021-06-24

In [201]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  5.19
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley   0.0   
data_1  2021-01-15       3  2021-01-21  -3.0  2021-01-15  valley  0.31   
data_2  2021-02-01       9  2021-02-12  -9.0  2021-02-01  valley  0.38   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley   0.0   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak  0.81   
data_5  2021-03-16       5  2021-03-23   1.0  2021-03-24    peak  0.06   
data_6  2021-03-30       4  2021-04-06  19.0  2021-05-03    peak  1.22   
data_7  2021-04-14       1  2021-04-15   0.0  2021-04-15  valley  0.28   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak  0.24   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley   0.0   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  2.22   
data_11 2021-06-10       1  2021-06-11   0.0  2021-06-11  valley  3.16   
data_12 2021-06-24

In [202]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  4.65
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley   8.0   
data_1  2021-01-15       3  2021-01-21  -3.0  2021-01-15  valley  5.94   
data_2  2021-02-01       9  2021-02-12  -9.0  2021-02-01  valley   5.6   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  5.25   
data_4  2021-03-02      14  2021-03-22   2.0  2021-03-24    peak   5.6   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   6.5   
data_6  2021-03-30       3  2021-04-05  20.0  2021-05-03    peak  6.62   
data_7  2021-04-14       1  2021-04-15   0.0  2021-04-15  valley  5.75   
data_8  2021-04-28       2  2021-04-30   1.0  2021-05-03    peak  6.06   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  7.35   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  8.47   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  9.44   
data_12 2021-06-24

In [42]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  5.38
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley  13.22   
data_1  2021-01-15       3  2021-01-21  -3.0  2021-01-15  valley    9.8   
data_2  2021-02-01      10  2021-02-16 -10.0  2021-02-01  valley   8.67   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley   8.94   
data_4  2021-03-02      14  2021-03-22   2.0  2021-03-24    peak   8.97   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   9.95   
data_6  2021-03-30       3  2021-04-05  20.0  2021-05-03    peak  10.52   
data_7  2021-04-14       1  2021-04-15   0.0  2021-04-15  valley   9.35   
data_8  2021-04-28       2  2021-04-30   1.0  2021-05-03    peak   9.54   
data_9  2021-05-12       6  2021-05-20   1.0  2021-05-21  valley  10.66   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  14.75   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  15.68   
data_

In [44]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  4.08
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   0.0  2021-01-14  valley  14.92   
data_1  2021-01-15       3  2021-01-21  -3.0  2021-01-15  valley  14.76   
data_2  2021-02-01      11  2021-02-17 -11.0  2021-02-01  valley  13.71   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley  13.92   
data_4  2021-03-02      14  2021-03-22   2.0  2021-03-24    peak  14.06   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  13.94   
data_6  2021-03-30       3  2021-04-05  20.0  2021-05-03    peak  14.36   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley  13.55   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak  13.84   
data_9  2021-05-12       6  2021-05-20   1.0  2021-05-21  valley  16.49   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  15.26   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  16.06   
data_

In [203]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  9.27
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley   0.0   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley   0.1   
data_2  2021-02-01       9  2021-02-12  -9.0  2021-02-01  valley  0.09   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  1.33   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak   1.1   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  0.09   
data_6  2021-03-30      10  2021-04-14   1.0  2021-04-15  valley  1.91   
data_7  2021-04-14      13  2021-05-03   0.0  2021-05-03    peak  1.91   
data_8  2021-04-28       3  2021-05-03  20.0  2021-06-01    peak  1.17   
data_9  2021-05-12       6  2021-05-20   1.0  2021-05-21  valley  7.67   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  7.42   
data_11 2021-06-10       1  2021-06-11   0.0  2021-06-11  valley  5.75   
data_12 2021-06-24

In [204]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  9.15
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley  4.64   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley   4.3   
data_2  2021-02-01      11  2021-02-17 -11.0  2021-02-01  valley  5.45   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley  4.83   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak  5.18   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   5.6   
data_6  2021-03-30      10  2021-04-14   1.0  2021-04-15  valley  5.55   
data_7  2021-04-14      14  2021-05-04  -1.0  2021-05-03    peak  5.82   
data_8  2021-04-28       4  2021-05-04  19.0  2021-06-01    peak   7.1   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  9.75   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  8.25   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  9.33   
data_12 2021-06-24

In [41]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  9.12
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14  -9.0  2020-12-31  valley   8.03   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley   7.89   
data_2  2021-02-01      11  2021-02-17 -11.0  2021-02-01  valley   8.62   
data_3  2021-02-16       0  2021-02-16   2.0  2021-02-18  valley   9.04   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak    9.2   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   9.22   
data_6  2021-03-30      10  2021-04-14   1.0  2021-04-15  valley   9.23   
data_7  2021-04-14      14  2021-05-04  -1.0  2021-05-03    peak   9.25   
data_8  2021-04-28       4  2021-05-04  19.0  2021-06-01    peak  10.98   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  16.91   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  15.69   
data_11 2021-06-10       2  2021-06-14  -1.0  2021-06-11  valley  17.11   
data_

In [45]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  11.73
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   0.0  2021-01-14  valley  12.62   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley  12.62   
data_2  2021-02-01      11  2021-02-17 -11.0  2021-02-01  valley  13.05   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley  13.33   
data_4  2021-03-02      14  2021-03-22   2.0  2021-03-24    peak  13.52   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  13.38   
data_6  2021-03-30      10  2021-04-14   0.0  2021-04-14  valley  13.51   
data_7  2021-04-14      14  2021-05-04  -1.0  2021-05-03    peak  13.41   
data_8  2021-04-28       4  2021-05-04  84.0  2021-09-01    peak  29.28   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  16.61   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  16.84   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  18.07   
data

In [205]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  20.81
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   -9.0  2020-12-31  valley    0.0   
data_1  2021-01-15       2  2021-01-20   -2.0  2021-01-15  valley   0.36   
data_2  2021-02-01      10  2021-02-16  -10.0  2021-02-01  valley   0.64   
data_3  2021-02-16      25  2021-03-23    1.0  2021-03-24    peak   1.18   
data_4  2021-03-02      15  2021-03-23    1.0  2021-03-24    peak   1.09   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak   2.11   
data_6  2021-03-30       3  2021-04-05   20.0  2021-05-03    peak   2.78   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak  35.22   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  21.57   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley   15.9   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley   7.64   
data_11 2021-06-10       1  2021-06-11    0.0  2021-06-11  valley  

In [206]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  23.19
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   -9.0  2020-12-31  valley   9.75   
data_1  2021-01-15       4  2021-01-22   -4.0  2021-01-15  valley  10.56   
data_2  2021-02-01       9  2021-02-12   -9.0  2021-02-01  valley  12.44   
data_3  2021-02-16      23  2021-03-19    3.0  2021-03-24    peak   13.0   
data_4  2021-03-02      14  2021-03-22    2.0  2021-03-24    peak  13.44   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak  13.44   
data_6  2021-03-30      23  2021-05-03    0.0  2021-05-03    peak  17.62   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak   44.0   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  29.75   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley  28.22   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley  29.11   
data_11 2021-06-10       2  2021-06-14   -1.0  2021-06-11  valley  

In [40]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  20.69
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   -9.0  2020-12-31  valley  16.67   
data_1  2021-01-15       2  2021-01-20   -2.0  2021-01-15  valley  18.96   
data_2  2021-02-01       9  2021-02-12   -9.0  2021-02-01  valley  22.26   
data_3  2021-02-16      23  2021-03-19    3.0  2021-03-24    peak  24.33   
data_4  2021-03-02      14  2021-03-22    2.0  2021-03-24    peak  24.29   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak  24.19   
data_6  2021-03-30       3  2021-04-05   20.0  2021-05-03    peak   26.7   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak  65.35   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  45.43   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley  43.31   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley  43.55   
data_11 2021-06-10       2  2021-06-14   -1.0  2021-06-11  valley  

In [46]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  20.23
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14    0.0  2021-01-14  valley  17.52   
data_1  2021-01-15       2  2021-01-20   -2.0  2021-01-15  valley  18.73   
data_2  2021-02-01       9  2021-02-12   -9.0  2021-02-01  valley  21.69   
data_3  2021-02-16      23  2021-03-19    3.0  2021-03-24    peak  24.33   
data_4  2021-03-02      14  2021-03-22    2.0  2021-03-24    peak  24.29   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak  24.19   
data_6  2021-03-30       3  2021-04-05   20.0  2021-05-03    peak   26.7   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak  65.35   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  45.43   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley  43.31   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley  43.55   
data_11 2021-06-10       2  2021-06-14    0.0  2021-06-14  valley  

In [207]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'mean'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.19
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  0.0  2020-12-31  valley   0.0       12
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.02       67
data_2  2021-02-01       1  2021-02-02 -1.0  2021-02-01  valley   0.0       68
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley   0.0        5
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley   0.0       57
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley   0.0       51
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley   0.0        5
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  0.01       67
data_8  2021-04-28       0  2021-04-28  3.0  2021-05-03    peak   0.0        4
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak   0.0        3
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley   0.0       12
data_11 2021-06-10       0  2021

In [208]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'abs'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.04
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley  0.79       31
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.85       37
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.88       33
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.78       31
data_4  2021-03-02       0  2021-03-02  1.0  2021-03-03    peak  0.75        3
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.82       33
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley   0.8       26
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  0.79       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.67        1
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.83       20
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.79       41
data_11 2021-06-10       2  2021

In [39]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'rmse'
pv_method = 'HL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  1.08
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       1  2021-01-04 -1.0  2020-12-31  valley   1.0       31
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.14       37
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  1.13       33
data_3  2021-02-16       0  2021-02-16  2.0  2021-02-18  valley  0.97       31
data_4  2021-03-02       0  2021-03-02  1.0  2021-03-03    peak  0.87        3
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  1.08       36
data_6  2021-03-30       0  2021-03-30  2.0  2021-04-01  valley  1.12       26
data_7  2021-04-14       1  2021-04-15  0.0  2021-04-15  valley  1.07       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.82        1
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  1.16       28
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  1.13       25
data_11 2021-06-10       1  2021

In [47]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'rmse'
pv_method = 'CL'
harmonics, model, errors, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method, pv_method)


final_error =  0.65
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  1.0  2021-01-04    peak  0.45        3
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  1.31       41
data_2  2021-02-01       1  2021-02-02 -1.0  2021-02-01  valley  0.82       56
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.77       56
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.81       41
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.69       41
data_6  2021-03-30       2  2021-04-01 -1.0  2021-03-31  valley  0.82       68
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley  0.89       26
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.71        2
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.88       41
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.91       13
data_11 2021-06-10       2  2021