# Import packages


In [2]:
import numpy as np
import pylab as pl
from numpy import fft
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import datetime
from dateutil.relativedelta import relativedelta


In [3]:
def data_to_harmonics_function(data_stock):
    harmonics = {}
    for i in data_stock:
        harmonics[i] = {}
        # get data_stock's infomation
        data = data_stock[i]['Close']
        array_data = np.array(data)
        n_data = array_data.size
        time_data = np.arange(0, n_data)

        # detrend data
        # find linear trend in data
        Polynomial = np.polyfit(time_data, array_data, 1)
        data_notrend = array_data - Polynomial[0] * time_data    # detrended x

        # fft process
        data_freqdom = fft.fft(data_notrend, n=n_data)
        frequence = fft.fftfreq(n_data)
        f_positive = frequence[np.where(frequence > 0)]
        data_freqdom_positive = data_freqdom[np.where(frequence > 0)]

        # sort indexes
        indexes = list(range(f_positive.size))      # frequencies
        # sort method 1
        # indexes.sort(key = lambda i: np.absolute(frequence[i]))     # sort indexes by frequency, lower -> higher
        # sort method 2 :
        # sort indexes by amplitudes, lower -> higher
        indexes.sort(key=lambda i: np.absolute(data_freqdom[i]))
        indexes.reverse()       # sort indexes by amplitudes, higher -> lower

        # get data_all_time'size
        time_transfer = np.arange(0, 2*array_data.size)

        # mix harmonics
        for j in indexes:
            ampli = np.absolute(
                data_freqdom_positive[j]) / n_data     # amplitude
            phase = np.angle(data_freqdom_positive[j])      # phase
            harmonics[i][j] = ampli * \
                np.cos(2 * np.pi * f_positive[j] * time_transfer + phase)
    return harmonics


In [4]:
def find_pv_function(data, pv_range):
    pd.options.mode.chained_assignment = None
    try:
        for i in data:
            pv = data[i]['Close']
            data[i]['peaks'] = pd.Series(dtype='float64')
            data[i]['valleys'] = pd.Series(dtype='float64')
            peaks = data[i]['peaks']
            valleys = data[i]['valleys']
            for idx in range(0, len(pv)):
                if idx < pv_range:
                    if pv[idx] == pv.iloc[0:pv_range*2+1].max():
                        peaks.iloc[idx] = pv[idx]
                    if pv[idx] == pv.iloc[0:pv_range*2+1].min():
                        valleys.iloc[idx] = pv[idx]
                if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].max():
                    peaks.iloc[idx] = pv[idx]
                if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].min():
                    valleys.iloc[idx] = pv[idx]
            data[i]['peaks'] = peaks
            data[i]['valleys'] = valleys
    except:
        for i in data:
            for j in data[i]:
                pv = data[i][j]['Close']
                data[i][j]['peaks'] = pd.Series(dtype='float64')
                data[i][j]['valleys'] = pd.Series(dtype='float64')
                peaks = data[i][j]['peaks']
                valleys = data[i][j]['valleys']
                for idx in range(0, len(pv)):
                    if idx < pv_range:
                        if pv[idx] == pv.iloc[0:pv_range*2+1].max():
                            peaks.iloc[idx] = pv[idx]
                        if pv[idx] == pv.iloc[0:pv_range*2+1].min():
                            valleys.iloc[idx] = pv[idx]
                    if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].max():
                        peaks.iloc[idx] = pv[idx]
                    if pv[idx] == pv.iloc[idx-pv_range:idx+pv_range].min():
                        valleys.iloc[idx] = pv[idx]
                data[i][j]['peaks'] = peaks
                data[i][j]['valleys'] = valleys


In [5]:
def mix_harmonics(harmonics, n_harm_lower_limit, n_harm_upper_limit):
    processed_signal = {}
    for i in harmonics:
        processed_signal[i] = {}
        for n_harm in range(n_harm_lower_limit, n_harm_upper_limit+1):
            mixed_harmonic = np.zeros(len(harmonics[i][0]))
            for j in range(n_harm):
                mixed_harmonic += harmonics[i][j]
                # print(n_harm)
            # cuted_mixed_harmonic = mixed_harmonic[int(
            #     len(mixed_harmonic)/2):int(len(mixed_harmonic))]
            processed_signal[i][n_harm] = pd.DataFrame(
                {'Close': mixed_harmonic})
    return processed_signal


In [6]:
def find_pv_lead_function(data, processed_signal):
    for d in data:
        for p in processed_signal[d]:
            processed_signal[d][p]['pv'] = pd.Series(dtype='str')
            processing_signal = processed_signal[d][p].loc[list(data[d].index)]
            p_data = pd.DataFrame(
                {'peaks': data[d]['peaks'], 'count': range(len(data[d]))})
            p_data = p_data.drop(p_data[p_data['peaks'].isna()].index)
            p_data_count = list(p_data['count'])
            p_data_count_ans = list(p_data['count'].index)
            p_signal = pd.DataFrame(
                {'peaks': processing_signal['peaks'], 'count': range(len(processing_signal))})
            p_signal = p_signal.drop(p_signal[p_signal['peaks'].isna()].index)
            p_signal_list = list(p_signal['count'])
            p_lead = []
            p_ans = []
            for i in range(0, len(p_signal_list)):
                temp = []
                temp_abs = []
                temp_2 = []
                temp_p_ans = []
                for j in range(0, len(p_data_count)):
                    temp.append((p_data_count[j] - p_signal_list[i]))
                    temp_abs.append(abs(p_data_count[j] - p_signal_list[i]))
                for k in range(0, len(temp_abs)):
                    if temp_abs[k] == min(temp_abs):
                        temp_2 = temp[k]
                        temp_p_ans = p_data_count_ans[k]
                p_lead.append(temp_2)
                p_ans.append(temp_p_ans)
            p_signal['lead'] = p_lead

            v_data = pd.DataFrame(
                {'valleys': data[d]['valleys'], 'count': range(len(data[d]))})
            v_data = v_data.drop(v_data[v_data['valleys'].isna()].index)
            v_data_count = list(v_data['count'])
            v_data_count_ans = list(v_data['count'].index)
            v_signal = pd.DataFrame(
                {'valleys': processing_signal['valleys'], 'count': range(len(processing_signal))})
            v_signal = v_signal.drop(
                v_signal[v_signal['valleys'].isna()].index)
            v_signal_list = list(v_signal['count'])
            v_lead = []
            v_ans = []
            for i in range(0, len(v_signal_list)):
                temp = []
                temp_abs = []
                temp_2 = []
                temp_v_ans = []
                for j in range(0, len(v_data_count)):
                    temp.append((v_data_count[j] - v_signal_list[i]))
                    temp_abs.append(abs(v_data_count[j] - v_signal_list[i]))
                for k in range(0, len(temp_abs)):
                    if temp_abs[k] == min(temp_abs):
                        temp_2 = temp[k]
                        temp_v_ans = v_data_count_ans[k]
                v_lead.append(temp_2)
                v_ans.append(temp_v_ans)
            v_signal['lead'] = v_lead
            
            processed_signal[d][p]['lead'] = pd.Series(dtype='float64')
            processed_signal[d][p]['ans'] = pd.Series(dtype='int')
            processed_signal[d][p]['lead'].loc[p_signal['lead'].index] = p_signal['lead']
            processed_signal[d][p]['pv'].loc[p_signal['lead'].index] = 'peak'
            processed_signal[d][p]['lead'].loc[v_signal['lead'].index] = v_signal['lead']
            processed_signal[d][p]['pv'].loc[v_signal['lead'].index] = 'valley'
            processed_signal[d][p]['ans'].loc[p_signal['lead'].index] = p_ans
            processed_signal[d][p]['ans'].loc[v_signal['lead'].index] = v_ans

In [7]:
# stock_name = "^GSPC"
# date_predict_start = '2021-01-01'
# data_range = 200
# slide_range = 5
# n_slide = 1
# pv_range = 2
# n_harm_lower_limit = 20
# n_harm_upper_limit = 20
# fit_method = 'mean'
# harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
#     stock_name, date_predict_start, data_range, slide_range,
#     n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


In [8]:
def get_fit_error_function(processed_signal, fit_method):
    errors = {}
    for i in processed_signal:
        errors[i] = {}
        for j in processed_signal[i]:
            signal_dropna = processed_signal[i][j].drop(
                processed_signal[i][j][processed_signal[i][j]['lead'].isna()].index)
            if fit_method == 'mean':
                error = signal_dropna['lead'].mean()
            elif fit_method == 'abs':
                error = abs(signal_dropna['lead']).mean()
            errors[i][j] = error
    return errors


In [9]:
def get_best_fit_harm(processed_signal, errors):
    best_error = {}
    best_fit_harm = {}
    for i in processed_signal:
        best_error[i] = pd.Series(errors[i]).abs().min()
        best_fit_harm[i] = pd.Series(errors[i]).abs().idxmin()
        # print(best_fit_harm, best_error)
    return best_fit_harm, best_error


In [10]:
def get_first_lead_function(processed_signal):
    first_date = {}
    lead = {}
    pv = {}
    for i in processed_signal:
        first_date[i] = {}
        lead[i] = {}
        pv[i] = {}
        for j in processed_signal[i]:
            temp = processed_signal[i][j].loc[list(
                processed_signal[i][j]['lead'].dropna().index)[0]]
            first_date[i][j] = list(
                processed_signal[i][j]['lead'].dropna().index)[0]
            lead[i][j] = temp['lead']
            pv[i][j] = temp['pv']
    return first_date, lead, pv


In [11]:
def get_first_lead_function_best_fit(processed_signal, best_fit_harm):
    first_date = {}
    lead = {}
    pv = {}
    ans_date = {}
    for i in processed_signal:
        harm = best_fit_harm[i]
        temp = processed_signal[i][harm].loc[list(
            processed_signal[i][harm]['lead'].dropna().index)[0]]
        first_date[i] = list(processed_signal[i][harm]
                             ['lead'].dropna().index)[0]
        lead[i] = temp['lead']
        pv[i] = temp['pv']
        ans_date[i] = temp['ans']
    return first_date, lead, pv, ans_date


In [12]:
def load_data(stock_name, date_predict_start, data_range, slide_range, n_slide):
    train_data = {}
    test_data = {}
    date_predict_start = datetime.datetime.strptime(
        date_predict_start, '%Y-%m-%d')
    date_data_start_list = []
    date_predict_start_list = []
    date_predict_end_list = []
    for i in range(n_slide*2):
        date_data_start = date_predict_start - \
            relativedelta(days=+data_range)
        date_predict_end = date_predict_start + \
            relativedelta(days=+data_range)
        date_data_start_list.append(date_data_start)
        date_predict_start_list.append(date_predict_start)
        date_predict_end_list.append(date_predict_end)
        date_data_start = date_data_start + \
            relativedelta(days=+slide_range)
        date_predict_start = date_predict_start + \
            relativedelta(days=+slide_range)

    train_data_all = yf.Ticker(stock_name).history(
        start=date_data_start_list[0], end=date_predict_start_list[-1])
    test_data_all = yf.Ticker(stock_name).history(
        start=date_predict_start_list[0], end=date_predict_end_list[-1])
    test_data_all['count'] = range(len(test_data_all))
    test_data_start_list = []
    for i in range(n_slide):
        train_data['data_' + str(i)] = train_data_all.iloc[i *
                                                           slide_range:i*slide_range+data_range]
        train_data['data_' + str(i)] = train_data['data_' +
                                                  str(i)].reset_index(drop=True)
        test_data['data_' + str(i)] = test_data_all.iloc[i *
                                                         slide_range:i*slide_range+data_range]
        test_data_start_list.append(test_data['data_' + str(i)].index[0])
        test_data['data_' + str(i)] = test_data['data_' +
                                                str(i)].reset_index(drop=True)
    return train_data, test_data, test_data_all, test_data_start_list


In [13]:
def preprocessing(train_data, test_data, pv_range):
    find_pv_function(train_data, pv_range)
    find_pv_function(test_data, pv_range)

In [14]:
def build_model(train_data, n_harm_lower_limit, n_harm_upper_limit, pv_range) :
    harmonics = data_to_harmonics_function(train_data)
    processed_signal = mix_harmonics(
        harmonics, n_harm_lower_limit, n_harm_upper_limit)
    find_pv_function(processed_signal, pv_range)
    find_pv_lead_function(train_data, processed_signal)
    return harmonics, processed_signal

In [15]:
def train_model(processed_signal, fit_method):
    errors = get_fit_error_function(processed_signal, fit_method)
    best_fit_harm, best_error = get_best_fit_harm(processed_signal, errors)
    first_date, lead, pv, ans_date = get_first_lead_function_best_fit(
        processed_signal, best_fit_harm)
    return best_fit_harm, best_error, first_date, lead, pv, ans_date


In [16]:
def evaluate_model(processed_signal, test_data_start_list, test_data_all, best_fit_harm, best_error, first_date, lead, pv, ans_date):
    result_table = pd.DataFrame(columns=[
        's_date', 't_delay', 't_date', 'lead', 'ans_date', 'pv', 'error', 'best_fit'])
    for i in processed_signal:
        result_table.loc[i, 'error'] = round(best_error[i], 2)
        result_table.loc[i, 'ans_date'] = ans_date[i]
        result_table.loc[i, 'best_fit'] = best_fit_harm[i]
        result_table.loc[i, 't_delay'] = first_date[i]
        result_table.loc[i, 'lead'] = lead[i]
        result_table.loc[i, 'pv'] = pv[i]
    result_table['s_date'] = test_data_start_list
    for i in result_table.index:
        t_date = test_data_all.loc[test_data_all['count'] ==
                                        test_data_all['count'].loc[result_table.loc[i, 's_date']] +
                                        result_table.loc[i, 't_delay']].index[0]
        t_date = datetime.datetime.strftime(t_date, '%Y-%m-%d')
        result_table.loc[i, 't_date'] = t_date
        ans = test_data_all.loc[ test_data_all['count'] == test_data_all['count'].loc[result_table.loc[i, 't_date']] + result_table.loc[i, 'lead']].index[0]
        ans = datetime.datetime.strftime(ans, '%Y-%m-%d')
        result_table.loc[i, 'ans_date'] = ans
    final_error = round(
        sum([abs(ele) for ele in result_table['lead']]) / len(result_table['lead']), 2)
    return result_table, final_error


In [17]:
def main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
        n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method):

    # 1. Load data
    train_data, test_data, test_data_all, test_data_start_list = load_data(
        stock_name, date_predict_start, data_range, slide_range, n_slide)
    # 2. Preprocessing
    preprocessing(train_data, test_data, pv_range)
    # 3. Build model
    harmonics, model = build_model(
        train_data, n_harm_lower_limit, n_harm_upper_limit, pv_range)
    # 4. Train model
    best_fit_harm, best_error, first_date, lead, pv, ans_date = train_model(
        model, fit_method)
    # 5. Evaluate model
    result_table, final_error = evaluate_model(
        model, test_data_start_list, test_data_all, best_fit_harm, best_error, first_date, lead, pv, ans_date)
    print('final_error = ', final_error)
    print(result_table)
    # 6. Predict

    return harmonics, model, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error


In [40]:
# stock_name = "^GSPC"
# date_predict_start = '2021-01-01'
# data_range = 200
# slide_range = 5
# n_slide = 1
# pv_range = 2
# n_harm_lower_limit = 20
# n_harm_upper_limit = 20
# fit_method = 'mean'
# harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
#     stock_name, date_predict_start, data_range, slide_range,
#     n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


In [41]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  1.12
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       2  2021-01-05  2.0  2021-01-07  valley  0.07       39
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.07       40
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley   0.0       34
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley   0.0       28
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.02       35
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.02       31
data_6  2021-03-30       0  2021-03-30  1.0  2021-03-31  valley   0.0       25
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley   0.0       29
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley   0.0       27
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak   0.0       27
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley   0.0       27
data_11 2021-06-10       2  2021

In [42]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  1.0
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  0.0  2020-12-31  valley  0.73       38
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.72       38
data_2  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.59       37
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.54       31
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.63       32
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.57       26
data_6  2021-03-30       0  2021-03-30  1.0  2021-03-31  valley  0.57       26
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley  0.65       39
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.64       40
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.64       40
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.69       25
data_11 2021-06-10       1  2021-

In [56]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  1.27
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       2  2021-01-05  2.0  2021-01-07  valley  0.07       39
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak   0.0       40
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.07       40
data_3  2021-01-25       0  2021-01-25  3.0  2021-01-28    peak   0.0       31
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley   0.0       34
data_5  2021-02-08       0  2021-02-08  1.0  2021-02-09    peak   0.0       25
data_6  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley   0.0       28
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley   0.0       20
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.02       35
data_9  2021-03-09       0  2021-03-09  1.0  2021-03-10  valley   0.0       27
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.02       31
data_11 2021-03-23       2  2021

In [55]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  0.85
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  0.0  2020-12-31  valley  0.73       38
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak  0.73       38
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.72       38
data_3  2021-01-25       0  2021-01-25  3.0  2021-01-28    peak  0.64       38
data_4  2021-02-01       2  2021-02-03 -2.0  2021-02-01  valley  0.59       37
data_5  2021-02-08       0  2021-02-08  1.0  2021-02-09    peak  0.58       26
data_6  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.54       31
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley  0.57       26
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.63       32
data_9  2021-03-09       0  2021-03-09  1.0  2021-03-10  valley  0.64       40
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley  0.57       26
data_11 2021-03-23       1  2021

In [45]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  3.27
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       2  2021-01-05   4.0  2021-01-11    peak  0.43   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley  0.04   
data_2  2021-02-01       0  2021-02-01   6.0  2021-02-09    peak   0.0   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley   1.0   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley  0.12   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  0.48   
data_6  2021-03-30       3  2021-04-05   1.0  2021-04-06    peak   0.1   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley  0.16   
data_8  2021-04-28       4  2021-05-04  -1.0  2021-05-03    peak   0.1   
data_9  2021-05-12       0  2021-05-12   2.0  2021-05-14    peak  0.12   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  0.03   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  0.03   
data_12 2021-06-24

In [46]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 5
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  2.85
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       4  2021-01-07   2.0  2021-01-11    peak  1.93   
data_1  2021-01-15       1  2021-01-19  -1.0  2021-01-15  valley  1.78   
data_2  2021-02-01       5  2021-02-08   1.0  2021-02-09    peak  2.15   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley   2.0   
data_4  2021-03-02       0  2021-03-02   0.0  2021-03-02  valley  1.26   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  1.54   
data_6  2021-03-30       4  2021-04-06   0.0  2021-04-06    peak  1.38   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley  1.52   
data_8  2021-04-28       3  2021-05-03   0.0  2021-05-03    peak   1.5   
data_9  2021-05-12       0  2021-05-12   2.0  2021-05-14    peak  1.42   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  1.41   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  1.48   
data_12 2021-06-24

In [47]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  5.92
            s_date t_delay      t_date  lead    ans_date      pv error  \
data_0  2020-12-31       4  2021-01-07  52.0  2021-03-24    peak  0.12   
data_1  2021-01-15       1  2021-01-19  -1.0  2021-01-15  valley  0.33   
data_2  2021-02-01      10  2021-02-16 -10.0  2021-02-01  valley   1.5   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley  0.38   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak  0.54   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak  1.33   
data_6  2021-03-30       3  2021-04-05  20.0  2021-05-03    peak  0.12   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley  0.06   
data_8  2021-04-28       4  2021-05-04  -1.0  2021-05-03    peak   0.5   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  2.53   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  0.72   
data_11 2021-06-10       1  2021-06-11   1.0  2021-06-14  valley  1.84   
data_12 2021-06-24

In [48]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 10
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  3.96
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   0.0  2021-01-14  valley    8.5   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley    9.2   
data_2  2021-02-01      10  2021-02-16 -10.0  2021-02-01  valley   8.57   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley   8.73   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak    8.8   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   9.07   
data_6  2021-03-30       3  2021-04-05  20.0  2021-05-03    peak   9.33   
data_7  2021-04-14       1  2021-04-15  -1.0  2021-04-14  valley   8.88   
data_8  2021-04-28       4  2021-05-04  -1.0  2021-05-03    peak    9.0   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley  11.13   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley   9.35   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley  10.17   
data_

In [49]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  11.38
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   0.0  2021-01-14  valley   0.36   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley    0.0   
data_2  2021-02-01      10  2021-02-16 -10.0  2021-02-01  valley   0.08   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley   0.25   
data_4  2021-03-02      14  2021-03-22   2.0  2021-03-24    peak    0.0   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak    0.0   
data_6  2021-03-30      10  2021-04-14   0.0  2021-04-14  valley   0.09   
data_7  2021-04-14      13  2021-05-03   0.0  2021-05-03    peak    0.0   
data_8  2021-04-28       3  2021-05-03  85.0  2021-09-01    peak  11.92   
data_9  2021-05-12       6  2021-05-20   1.0  2021-05-21  valley    7.5   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley   5.25   
data_11 2021-06-10       1  2021-06-11   1.0  2021-06-14  valley   3.83   
data

In [50]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 15
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  11.69
            s_date t_delay      t_date  lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14   0.0  2021-01-14  valley   6.18   
data_1  2021-01-15       2  2021-01-20  -2.0  2021-01-15  valley    6.0   
data_2  2021-02-01      11  2021-02-17 -11.0  2021-02-01  valley   6.91   
data_3  2021-02-16       0  2021-02-16   0.0  2021-02-16  valley   6.17   
data_4  2021-03-02      15  2021-03-23   1.0  2021-03-24    peak   6.82   
data_5  2021-03-16       4  2021-03-22   2.0  2021-03-24    peak   7.36   
data_6  2021-03-30      10  2021-04-14   0.0  2021-04-14  valley   7.18   
data_7  2021-04-14      14  2021-05-04  -1.0  2021-05-03    peak   7.55   
data_8  2021-04-28       4  2021-05-04  84.0  2021-09-01    peak  15.92   
data_9  2021-05-12       7  2021-05-21   0.0  2021-05-21  valley   9.67   
data_10 2021-05-26       0  2021-05-26   0.0  2021-05-26  valley  10.08   
data_11 2021-06-10       2  2021-06-14   0.0  2021-06-14  valley   10.6   
data

In [51]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  20.35
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14    0.0  2021-01-14  valley   0.88   
data_1  2021-01-15       2  2021-01-20   -2.0  2021-01-15  valley   0.09   
data_2  2021-02-01      35  2021-03-23    1.0  2021-03-24    peak    0.2   
data_3  2021-02-16      25  2021-03-23    1.0  2021-03-24    peak   1.09   
data_4  2021-03-02      15  2021-03-23    1.0  2021-03-24    peak    1.0   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak    2.0   
data_6  2021-03-30       3  2021-04-05   20.0  2021-05-03    peak   2.78   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak  35.22   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  21.57   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley   15.9   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley   7.64   
data_11 2021-06-10       1  2021-06-11    1.0  2021-06-14  valley  

In [52]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 20
n_harm_lower_limit = 20
n_harm_upper_limit = 40
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  19.62
            s_date t_delay      t_date   lead    ans_date      pv  error  \
data_0  2020-12-31       9  2021-01-14    0.0  2021-01-14  valley   9.62   
data_1  2021-01-15       4  2021-01-22   -4.0  2021-01-15  valley  10.44   
data_2  2021-02-01       9  2021-02-12   -9.0  2021-02-01  valley  12.11   
data_3  2021-02-16      23  2021-03-19    3.0  2021-03-24    peak  13.11   
data_4  2021-03-02      14  2021-03-22    2.0  2021-03-24    peak  13.33   
data_5  2021-03-16       4  2021-03-22    2.0  2021-03-24    peak  13.33   
data_6  2021-03-30      23  2021-05-03    0.0  2021-05-03    peak  17.62   
data_7  2021-04-14      14  2021-05-04  142.0  2021-11-23    peak   44.0   
data_8  2021-04-28      17  2021-05-21    0.0  2021-05-21  valley  29.75   
data_9  2021-05-12       7  2021-05-21    0.0  2021-05-21  valley  28.22   
data_10 2021-05-26       0  2021-05-26    0.0  2021-05-26  valley  29.11   
data_11 2021-06-10       2  2021-06-14    0.0  2021-06-14  valley  

In [53]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  0.92
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  1.0  2021-01-04    peak   0.0        4
data_1  2021-01-15       0  2021-01-15  4.0  2021-01-22    peak   0.0       17
data_2  2021-02-01       0  2021-02-01  0.0  2021-02-01  valley   0.0        3
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley   0.0       28
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley   0.0        9
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley   0.0       47
data_6  2021-03-30       0  2021-03-30  1.0  2021-03-31  valley   0.0        3
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley   0.0       29
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley   0.0        2
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak   0.0        5
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley   0.0       27
data_11 2021-06-10       0  2021

In [54]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 10
n_slide = 26
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  0.65
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  1.0  2021-01-04    peak   0.2        3
data_1  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.67       70
data_2  2021-02-01       1  2021-02-02 -1.0  2021-02-01  valley  0.48       69
data_3  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.41       66
data_4  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.38       66
data_5  2021-03-16       0  2021-03-16  0.0  2021-03-16  valley   0.4       62
data_6  2021-03-30       2  2021-04-01 -1.0  2021-03-31  valley  0.43       68
data_7  2021-04-14       1  2021-04-15 -1.0  2021-04-14  valley  0.44       42
data_8  2021-04-28       0  2021-04-28  0.0  2021-04-28  valley  0.47       45
data_9  2021-05-12       0  2021-05-12  2.0  2021-05-14    peak  0.48       41
data_10 2021-05-26       0  2021-05-26  0.0  2021-05-26  valley  0.54       61
data_11 2021-06-10       2  2021

In [18]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'mean'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  1.25
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  1.0  2021-01-04    peak   0.0        4
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak   0.0       40
data_2  2021-01-15       0  2021-01-15  4.0  2021-01-22    peak   0.0       17
data_3  2021-01-25       0  2021-01-25  3.0  2021-01-28    peak   0.0        6
data_4  2021-02-01       0  2021-02-01  0.0  2021-02-01  valley   0.0        3
data_5  2021-02-08       0  2021-02-08  1.0  2021-02-09    peak   0.0       25
data_6  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley   0.0       28
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley   0.0        7
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley   0.0        9
data_9  2021-03-09       0  2021-03-09  1.0  2021-03-10  valley   0.0       15
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley   0.0       47
data_11 2021-03-23       0  2021

In [19]:
stock_name = "^GSPC"
date_predict_start = '2021-01-01'
data_range = 200
slide_range = 5
n_slide = 52
pv_range = 2
n_harm_lower_limit = 1
n_harm_upper_limit = 70
fit_method = 'abs'
harmonics, processed_signal, best_fit_harm, best_error, first_date, lead, pv, result_table, final_error=  main_funtion(
    stock_name, date_predict_start, data_range, slide_range,
    n_slide, pv_range, n_harm_lower_limit, n_harm_upper_limit, fit_method)


final_error =  0.71
            s_date t_delay      t_date lead    ans_date      pv error best_fit
data_0  2020-12-31       0  2020-12-31  1.0  2021-01-04    peak   0.2        3
data_1  2021-01-08       0  2021-01-08  1.0  2021-01-11    peak  0.62       56
data_2  2021-01-15       1  2021-01-19 -1.0  2021-01-15  valley  0.67       70
data_3  2021-01-25       0  2021-01-25  3.0  2021-01-28    peak  0.58       66
data_4  2021-02-01       1  2021-02-02 -1.0  2021-02-01  valley  0.48       69
data_5  2021-02-08       1  2021-02-09  0.0  2021-02-09    peak   0.4       66
data_6  2021-02-16       0  2021-02-16  0.0  2021-02-16  valley  0.41       66
data_7  2021-02-23       0  2021-02-23  0.0  2021-02-23  valley  0.46       68
data_8  2021-03-02       0  2021-03-02  0.0  2021-03-02  valley  0.38       66
data_9  2021-03-09       0  2021-03-09  1.0  2021-03-10  valley  0.45       70
data_10 2021-03-16       0  2021-03-16  0.0  2021-03-16  valley   0.4       62
data_11 2021-03-23       1  2021