# Description

The purpose of the follow is to give us a model for price movements based on mean reversion. We do the following:

* Decompose the time series into trend, seasonal, and residual components using the STL decomposition method.

* Calculate the probabilities of the price being above or below the trend using a logistic regression model.

* Identify periods of high probability of the price being above or below the trend and attaching markers to the time series to indicate these periods.

* Identifying durations of high probability periods and calculating the extremes within each duration. The script will identify how long each duration lasted, and the maximum or minimum price movement within each duration.

* Fitting the distribution of the percentage deviations of the prices from the trend and identifying the probabilities of future price movements using a Poisson distribution.

* Creating various plots to visualize the results, including time series plots with trend, seasonal components, and probability markers, histograms of the probabilities, bar charts of the probability over time, and a bar chart of the probability density function of the percentage deviation of prices from the trend.



In [1]:
# Note: You can run this from the command line with scripts/mean_regressions.py
import plotly.io as pio
from plotly.offline import init_notebook_mode
from IPython.display import Image, display
import yfinance as yf
import pandas as pd
from machine_learning_finance import get_coin_data_frames, attach_markers, generate_probability, \
    calc_durations_with_extremes, prob_chart, plot_full_analysis, calculate_and_graph_duration_probabilities, \
    calculate_and_graph_price_probabilities, plot_full_analysis

init_notebook_mode(connected=True)
windows = [300, 600, 900, 1500]
window = 300
coin_base = False
ku_coin = False
use_stocks = True
symbol = "AAPL"
date_str = pd.Timestamp.now().strftime("%Y-%m-%d")
start_date = pd.to_datetime(date_str)

if not use_stocks:
    df_raw = get_coin_data_frames(window, symbol)
    df_raw = df_raw.set_index("Date")
    df_raw = df_raw.sort_index()
else:
    tickerObj = yf.download(tickers=symbol, interval="1d")
    df_raw = pd.DataFrame(tickerObj)
    df_raw = df_raw.tail(window)
    
trend, prob_above_trend, prob_below_trend, volatility, model = generate_probability(df_raw)

df_raw = attach_markers(df_raw, trend, prob_above_trend)
df_durations = calc_durations_with_extremes(df_raw)
plot_full_analysis(df_raw, trend, prob_above_trend, prob_below_trend, model, df_durations)


calculate_and_graph_duration_probabilities(start_date, df_raw, df_durations)

percent_diff_from_trend = ((df_raw["Close"] - trend) / trend) * 100
calculate_and_graph_price_probabilities(percent_diff_from_trend)



[*********************100%***********************]  1 of 1 completed


Current price diff: 14.12344813827064


In [9]:
import pandas as pd
pd.set_option('display.max_rows', None)
# Assuming your dataframe is called df
tickerObj = yf.download(tickers=symbol, interval="1d")
df_raw = pd.DataFrame(tickerObj)
df = df_raw.tail(365*2)

# Create an empty dataframe for the results
df_results = pd.DataFrame(columns=['date', 'action'])

# Iterate from 365 to 365 * 2
for i in range(365, 365 * 2):
    # Tail 365 days back from the current index and create a tailed dataframe
    df_short = df.iloc[i - 365:i]

    # Invoke the function on the tailed dataframe
    trend, prob_above_trend, prob_below_trend, volatility, model = generate_probability(df_short)

    # Get the last value of prob_above_trend
    last_prob = prob_above_trend[-1]

    # Determine the action based on the last value of prob_above_trend
    if 0.2 < last_prob < 0.8:
        action = 0
    elif last_prob >= 0.8:
        action = 1
    else:  # last_prob <= 0.2
        action = -1

    # Add a new row to the df_results dataframe
    new_row = pd.DataFrame({'date': [df.index[i]], 'action': [action]})
    df_results = pd.concat([df_results, new_row], ignore_index=True)

# Second loop
# Tail 365 days of the original dataframe
df_short = df.iloc[-365:]

# Invoke the generate_probability function
trend, prob_above_trend, prob_below_trend, volatility, model = generate_probability(df_short)

# Initialize the action_prime column in df_results
df_results['action_prime'] = None

# Iterate over the dates
for i in range(365 * 2 - 365):
    # Get the last value of prob_above_trend for the current date
    last_prob = prob_above_trend[i]

    # Determine the action_prime based on the last value of prob_above_trend
    if 0.2 < last_prob < 0.8:
        action_prime = 0
    elif last_prob >= 0.8:
        action_prime = 1
    else:  # last_prob <= 0.2
        action_prime = -1

    # Update the action_prime column in df_results
    df_results.at[i, 'action_prime'] = action_prime

# Print the results
print(df_results)    
    



[*********************100%***********************]  1 of 1 completed
          date action action_prime
0   2021-11-24     -1            1
1   2021-11-26      0            1
2   2021-11-29      0            1
3   2021-11-30      0            1
4   2021-12-01     -1            1
5   2021-12-02     -1            1
6   2021-12-03     -1            1
7   2021-12-06     -1            1
8   2021-12-07     -1            1
9   2021-12-08     -1            0
10  2021-12-09     -1            0
11  2021-12-10     -1            0
12  2021-12-13     -1            0
13  2021-12-14     -1            0
14  2021-12-15     -1            0
15  2021-12-16     -1            0
16  2021-12-17     -1            0
17  2021-12-20     -1            0
18  2021-12-21     -1            0
19  2021-12-22     -1            0
20  2021-12-23     -1            0
21  2021-12-27     -1            0
22  2021-12-28     -1            0
23  2021-12-29     -1            0
24  2021-12-30     -1            0
25  2021-12-31     -1