In [11]:
import yfinance as yf
import pandas_ta as pta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings("ignore")

In [12]:
start_date = "2013-12-01"
end_date = "2023-12-01"
symbol = "googl"
data_stock = yf.download(tickers = symbol, start = start_date, end = end_date)
df = data_stock.copy()

[*********************100%%**********************]  1 of 1 completed


In [13]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-12-02,26.614365,26.685434,26.295296,26.388388,26.388388,54873072
2013-12-03,26.300051,26.612614,26.251753,26.357859,26.357859,66976956
2013-12-04,26.31056,26.626125,26.276276,26.480982,26.480982,47616336
2013-12-05,26.456455,26.518019,26.303555,26.459961,26.459961,45302652
2013-12-06,26.771523,26.776777,26.528528,26.773523,26.773523,57094848


In [14]:
df.reset_index(inplace=True)
df.drop(["Date"], axis=1, inplace=True)

In [15]:
df['sma50'] = pta.sma(close = df['Close'], length=50)
df['sma200'] = pta.sma(close = df['Close'], length=200)
df['rsi'] = pta.rsi(close = df['Close'], length=14)

In [16]:
df.dropna(inplace=True)

X = df.drop(columns=["Close", "Adj Close"], axis = 1)
y = df["Close"]

train_size = int(0.70 * y.shape[0])
X_train = X[:train_size]
y_train = y[:train_size]
X_test = X[train_size:]
y_test = y[train_size:]

In [17]:
scaler = MinMaxScaler(feature_range=(0,1))
X_train_scaled = scaler.fit_transform(X_train)
y_train_scaled = scaler.fit_transform(y_train.values.reshape(-1,1))
X_test_scaled = scaler.fit_transform(X_test)
y_test_scaled = scaler.fit_transform(y_test.values.reshape(-1,1))

In [18]:
rf_model = RandomForestRegressor()

rf_params = {"max_depth": [2, 3],
             "max_features": [3, 4, 5],
             "n_estimators": [200]} 

rf_best_grid = GridSearchCV(rf_model, rf_params,
                            cv=5, n_jobs=-1).fit(X_train_scaled, y_train_scaled)
print(rf_best_grid.best_params_)

{'max_depth': 3, 'max_features': 4, 'n_estimators': 200}


In [19]:
rf_final = RandomForestRegressor(**rf_best_grid.best_params_)
rf_final.fit(X_train_scaled, y_train_scaled)

In [20]:
train_pred = rf_final.predict(X_train_scaled)

In [21]:
test_pred = rf_final.predict(X_test_scaled)

In [22]:
train_pred = scaler.inverse_transform(train_pred.reshape(-1,1))
test_pred = scaler.inverse_transform(test_pred.reshape(-1,1))

In [23]:
y_train = scaler.inverse_transform(y_train_scaled)
y_test = scaler.inverse_transform(y_test_scaled)

In [24]:
# train hatası
train_rmse = np.sqrt(metrics.mean_squared_error(y_train, train_pred))
print("rmse train hatası:", train_rmse)

# test hatası
test_rmse = np.sqrt(metrics.mean_squared_error(y_test, test_pred))
print("rmse test hatası:", test_rmse)

# R^2 metriği
r2 = metrics.r2_score(y_test, test_pred)
print("r2 skoru:", r2)

rmse train hatası: 1.9305447805556497
rmse test hatası: 3.889799839796128
r2 skoru: 0.946882148916209


In [25]:
#----

In [26]:
Importance = pd.DataFrame({"Importance": rf_final.feature_importances_*100}, 
                         index = X_train.columns)
importance_sorted = Importance.sort_values(by="Importance", ascending=False)

In [27]:
importance_sorted

Unnamed: 0,Importance
High,43.304046
Low,33.11216
Open,13.071256
sma200,9.881135
sma50,0.631404
Volume,0.0
rsi,0.0
