In [None]:
%pip install evidently
%pip install yfinance

In [None]:
from datetime import datetime, timedelta

import pandas as pd
import yfinance as yf
from evidently.metric_preset import (
    ClassificationPreset,
    DataDriftPreset,
    DataQualityPreset,
)
from evidently.report import Report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [None]:
# 데이터 수집 및 전처리
def get_kospi_data(days) -> pd.DataFrame:
    end_date = datetime.now() - timedelta(days=1)
    start_date = end_date - timedelta(days=days)
    df = yf.download(
        "^KS11",
        start=start_date.strftime("%Y-%m-%d"),
        end=end_date.strftime("%Y-%m-%d"),
        group_by="ticker",
    )["^KS11"]
    df["Label"] = df["Close"].diff().apply(lambda x: 1 if x > 0 else 0)
    df["MA5"] = df["Close"].rolling(window=5).mean()
    df["MA20"] = df["Close"].rolling(window=20).mean()
    df["Volatility"] = (df["High"] - df["Low"]) / df["Low"]
    df.dropna(inplace=True)
    return df

In [None]:
kospi_data = get_kospi_data(3 * 365)

X = kospi_data[["MA5", "MA20"]]
y = kospi_data["Label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
X_train["prediction"] = model.predict(X_train)
X_test["prediction"] = model.predict(X_test)
X_train["target"] = y_train
X_test["target"] = y_test

In [None]:
data_drift_report = Report(metrics=[DataDriftPreset()])
data_drift_report.run(reference_data=X_train, current_data=X_test)
data_drift_report

In [None]:
data_quality_report = Report(metrics=[DataQualityPreset()])
data_quality_report.run(reference_data=X_train, current_data=X_test)
data_quality_report

In [None]:
performance_report = Report(metrics=[ClassificationPreset()])
performance_report.run(reference_data=X_train, current_data=X_test)
performance_report