<a href="https://colab.research.google.com/github/ajojojojo/deepcraft_treinee/blob/main/NTT_stock_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install greykite

import numpy as np
import pandas as pd
import plotly

from greykite.algo.changepoint.adalasso.changepoint_detector import ChangepointDetector
from greykite.algo.forecast.silverkite.constants.silverkite_column import SilverkiteColumn
from greykite.common.features.timeseries_features import get_available_holidays_across_countries
from greykite.framework.input.univariate_time_series import UnivariateTimeSeries
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.autogen.forecast_config import ModelComponentsParam
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results



In [2]:
df = pd.read_csv('/content/drive/MyDrive/stock_price.csv')

In [3]:
df

Unnamed: 0,日付け,終値,始値,高値,安値,出来高,変化率 %
0,2024-08-01,156.3,159.3,159.4,156.1,79.15M,-2.56%
1,2024-07-31,160.4,158.2,160.7,158.1,173.91M,1.07%
2,2024-07-30,158.7,158.8,159.2,158.0,138.14M,-0.63%
3,2024-07-29,159.7,158.7,160.2,158.4,126.28M,1.14%
4,2024-07-26,157.9,159.3,159.6,157.9,155.08M,-0.13%
...,...,...,...,...,...,...,...
9197,1987-02-18,191.2,186.3,191.2,186.3,795.09M,4.82%
9198,1987-02-17,182.4,176.5,184.3,175.5,881.51M,3.93%
9199,1987-02-16,175.5,166.7,175.5,165.7,360.51M,1.74%
9200,1987-02-13,172.5,176.5,176.5,172.5,422.38M,-5.43%


In [4]:
df.dtypes

Unnamed: 0,0
日付け,object
終値,float64
始値,float64
高値,float64
安値,float64
出来高,object
変化率 %,object


In [5]:
# prompt: 日付カラムをdatetimeに

df['日付け'] = pd.to_datetime(df['日付け'])


In [6]:
# prompt: 出来高カラムをfloatに。

df["出来高"] = df["出来高"].str.replace('M', '')
df["出来高"] = df["出来高"].str.replace('B', '').astype(float)
df["変化率 %"] = df["変化率 %"].str.replace('%', '').astype(float)


In [7]:
ts = UnivariateTimeSeries()
ts.load_data(
    df=df,
    time_col="日付け",
    value_col="終値",
    freq="D"
)



<greykite.framework.input.univariate_time_series.UnivariateTimeSeries at 0x7dcede78f820>

In [9]:
# 年周期季節性

fig = ts.plot_quantiles_and_overlays(
    groupby_time_feature="month_dom",
    show_mean=True,
    show_quantiles=False,
    show_overlays=True,
    overlay_label_time_feature="year",
    overlay_style={"line": {"width": 1}, "opacity": 0.5},
    center_values=True,
    xlabel="day of year",
    ylabel=ts.original_value_col,
    title="yearly seasonality for each year (centered)",
)
plotly.io.show(fig)


In [10]:
# 週単位季節性

fig = ts.plot_quantiles_and_overlays(
    groupby_time_feature="str_dow",
    show_mean=True,
    show_quantiles=False,
    show_overlays=True,
    center_values=True,
    # splits overlays by month (try other features too)
    overlay_label_time_feature="month",
    # optional overlay styling, passed to `plotly.graph_objects.Scatter`
    overlay_style={"line": {"width": 1}, "opacity": 0.5},
    xlabel="day of week",
    ylabel=ts.original_value_col,
    title="weekly seasonality by month",
)
plotly.io.show(fig)

In [11]:
# 株価推移

fig = ts.plot()
plotly.io.show(fig)

In [14]:
# 週単位季節性

fig = ts.plot_quantiles_and_overlays(
    groupby_time_feature="str_dow",
    show_mean=True,
    show_quantiles=True,
    show_overlays=20,  # randomly selects up to 20 overlays
    overlay_label_sliding_window_size=7,  # each overlay is a single cycle (week)
    center_values=False,
    xlabel="day of week",
    ylabel=ts.original_value_col,
    title="weekly seasonality with overlays"
)
plotly.io.show(fig)


All-NaN slice encountered



In [15]:
grouped_df = ts.get_quantiles_and_overlays(
     groupby_sliding_window_size=7*13,  # accepts the same parameters as `plot_quantiles_and_overlays`
     show_mean=True,
     show_quantiles=False,
     show_overlays=True,
     center_values=False,  # note! does not center, to compute raw differences from the mean below
     overlay_label_time_feature="str_dow",
)


In [16]:
grouped_df

category,mean,overlay,overlay,overlay,overlay,overlay
name,mean,1-Mon,2-Tue,3-Wed,4-Thu,5-Fri
ts_downsample,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1987-02-12,248.650000,247.060000,256.300000,256.660000,243.483333,240.780000
1987-05-14,280.238710,278.600000,279.500000,279.591667,280.838462,282.430769
1987-08-13,249.045312,248.184615,246.966667,248.569231,250.753846,250.592308
1987-11-12,264.753226,265.516667,262.583333,264.291667,263.815385,267.415385
1988-02-11,226.305085,224.130769,226.308333,227.533333,228.436364,225.400000
...,...,...,...,...,...,...
2023-06-29,164.771429,165.891667,164.723077,164.776923,164.161538,164.358333
2023-09-28,171.637097,169.863636,171.853846,172.146154,172.330769,171.725000
2023-12-28,176.077966,176.700000,176.350000,175.866667,176.266667,175.258333
2024-03-28,178.675410,179.618182,178.861538,178.433333,177.630769,178.983333


In [17]:
# 曜日ごとの推移

from greykite.common.viz.timeseries_plotting import add_groupby_column, plot_multivariate, plot_univariate

overlay_minus_mean = grouped_df['overlay'] - grouped_df['mean'].values  # subtracts the mean
x_col = overlay_minus_mean.index.name
overlay_minus_mean.reset_index(inplace=True)  # `plot_multivariate` expects the x-value to be a column
fig = plot_multivariate(  # plots the deviation from the mean
     df=overlay_minus_mean,
     x_col=x_col,
     ylabel=ts.original_value_col,
     title="day of week effect over time")
plotly.io.show(fig)

In [18]:
# トレンド変化点検出

model = ChangepointDetector()
res = model.find_trend_changepoints(
    df=df,
    time_col="日付け",
    value_col="終値",
    resample_freq="7D",
    potential_changepoint_n=25,
    no_changepoint_proportion_from_end=0.2,
    yearly_seasonality_order=0
)
pd.DataFrame({"trend_changepoints": res["trend_changepoints"]})

fig = model.plot(plot=False)
plotly.io.show(fig)

In [19]:
growth = {
    "growth_term": "linear"
}

changepoints = {
    "changepoints_dict": {
    "method": "auto",
    "resample_freq": "7D",
    "potential_changepoint_n": 25,
    "no_changepoint_proportion_from_end": 0.2,
    "yearly_seasonality_order": 0
    }
}

seasonality = {
    "yearly_seasonality": False,
    "quarterly_seasonality": False,
    "monthly_seasonality": False,
    "weekly_seasonality": False,
    "daily_seasonality": False
}

In [20]:
model_components = ModelComponentsParam(
    seasonality=seasonality,
    growth=growth,
    changepoints=changepoints,
    uncertainty={
        "uncertainty_dict": "auto"
    },
    custom={
        "fit_algorithm_dict": {
            "fit_algorithm": "ridge"
        }
    }
)

In [21]:
import datetime

In [22]:
metadata = MetadataParam(
    time_col="日付け",
    value_col="終値",
    freq="D",
    train_end_date=datetime.datetime(2023, 2, 7)
)

forecaster = Forecaster()

result = forecaster.run_forecast_config(
    df=df,
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        model_components_param=model_components,
        metadata_param=metadata,
        coverage=0.95,
        forecast_horizon=365
      )
)


Provided frequency 'D' does not match inferred frequency 'B'. Using 'D'.


Provided frequency 'D' does not match inferred frequency 'B'. Using 'D'.



Fitting 3 folds for each of 1 candidates, totalling 3 fits



Input data has many null values. Missing 32.38% of one input.


Re-constructing `h_mat @ h_mat.T` by `lu_d_sqrt @ lu_d_sqrt.T` has a bigger relative error 0.039184348415949796 than tolerance 1e-08. Falling back to `h_mat` for more accurate variance estimation.


Input data has many null values. Missing 32.33% of one input.


118 value(s) in y_true were NA or infinite and are omitted in error calc.


Input data has many null values. Missing 32.33% of one input.


118 value(s) in y_true were NA or infinite and are omitted in error calc.


Input data has many null values. Missing 32.33% of one input.


118 value(s) in y_true were NA or infinite and are omitted in error calc.


Input data has many null values. Missing 32.33% of one input.


118 value(s) in y_true were NA or infinite and are omitted in error calc.


Input data has many null values. Missing 32.33% of one input.


118 value(s) in y_true were NA or infinite and are omitted in error calc.


Input data has many null values. Mis

In [23]:
backtest = result.backtest
fig = backtest.plot()
plotly.io.show(fig)

In [24]:
evaluation = result.backtest.test_evaluation
evaluation_df = pd.DataFrame(list(evaluation.items()))
evaluation_df.rename(columns={0: "metric", 1: "value"}, inplace=True)
evaluation_df

Unnamed: 0,metric,value
0,CORR,0.533099
1,R2,-13.518811
2,MSE,771.4031
3,RMSE,27.774144
4,MAE,27.003317
5,MedAE,27.866717
6,MAPE,17.664498
7,MedAPE,18.332181
8,sMAPE,9.731071
9,Q80,21.602653
