<a href="https://colab.research.google.com/github/erendagasan/Eren-Dagasan-Personal/blob/main/gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Libraries and Indicator Function

!pip install -q bta-lib
!pip install -q ta

import btalib
import numpy as np
import pandas as pd
from ta.trend import PSARIndicator
from ta.momentum import WilliamsRIndicator
from ta.trend import AroonIndicator
from ta.volume import VolumePriceTrendIndicator
from ta.trend import CCIIndicator
from ta.momentum import ROCIndicator
from ta.trend import ADXIndicator
from ta.momentum import ultimate_oscillator
from ta.volume import ChaikinMoneyFlowIndicator
from ta.trend import KSTIndicator
from ta.momentum import TSIIndicator
from ta.trend import WMAIndicator
import yfinance as yf
import warnings
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

warnings.filterwarnings("ignore")


def create_indicators(data):
  data["RSI-14"] = btalib.rsi(data["Close"], period=14).df
  data["RSI-60"] = btalib.rsi(data["Close"], period=60).df

  data["STOCH-K"] = btalib.stoch(data['High'], data['Low'], data['Close']).df["k"]
  data["STOCH-D"] = btalib.stoch(data['High'], data['Low'], data['Close']).df["d"]

  data["WILLIAMS"] = WilliamsRIndicator(data["High"], data["Low"], data["Close"]).williams_r()
  data["AROON"] = AroonIndicator(close=data["Close"], window=25).aroon_indicator()
  data['CCI'] = CCIIndicator(close=data['Close'], low=data["Low"], high=data["High"], window=14).cci()
  data['ROC'] = ROCIndicator(close=data['Close'], window=5).roc()

  adx_indicator = ADXIndicator(high=data['High'], low=data['Low'], close=data['Close'], window=14)
  data['ADX'] = adx_indicator.adx()
  data['+DI'] = adx_indicator.adx_pos()
  data['-DI'] = adx_indicator.adx_neg()

  data['ULTIMATE-OSC'] = ultimate_oscillator(high=data['High'], low=data['Low'], close=data['Close'], window1=7, window2=14, window3=28)
  data['MONEY-FLOW'] = ChaikinMoneyFlowIndicator(high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], window=20).chaikin_money_flow()
  data['KST'] = KSTIndicator(data['Close']).kst()

  data['TSI'] = TSIIndicator(data['Close']).tsi()

  data['WMA-30'] = WMAIndicator(data['Close'], window=30).wma()

  data = data.dropna()
  data = data.reset_index()
  return data

# from google.colab import drive
# drive.mount('/content/drive')

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/92.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/92.2 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone


In [None]:
#@title Download the Model

import gdown

gdown.download("https://drive.google.com/u/1/uc?id=1-3r7tu0ZQXWNtqMQ35rX0DexxNt9P1l5&export=download", "/content/", quiet=False)
# gdown.download("https://drive.google.com/u/0/uc?id=117pezAA6jRLCwIsdpEhZgEa9tEanlC0O&export=download", "/content/", quiet=False)

# data = pd.read_csv("data.csv")
model = tf.keras.models.load_model("best_model_2.h5")

In [13]:
#@title Stock List
# sheet_id = "1RSqOXkFTAO7g4H9LEY3d3IX6H6bJaYk1"
# sheet_name = "Sheet_1"
# url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
# result_df = pd.read_csv(url)

# sheet_id = "1AA9MfqOtAAgO97__aomD79DciyT-PkRQ"
# sheet_name = "Sheet_1"
# url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
# result_df = pd.read_csv(url)

# nasdaq100 = ['AAPL', 'MSFT', 'GOOGL', 'GOOG', 'AMZN',
#              'NVDA', 'TSLA', 'META', 'AVGO', 'ASML',
#              'PEP', 'COST', 'ADBE', 'AZN', 'CSCO',
#              'NFLX', 'AMD', 'CMCSA', 'TMUS', 'TXN',
#              'QCOM', 'HON', 'INTU', 'INTC', 'SNY',
#              'VZ', 'AMGN', 'SBUX', 'ISRG', 'AMAT',
#              'BKNG', 'ADI', 'MDLZ', 'PDD', 'GILD',
#              'ADP', 'VRTX', 'ABNB', 'LRCX', 'PYPL',
#              'REGN', 'EQIX', 'MU', 'CSX', 'SNPS',
#              'CME', 'CDNS', 'KLAC', 'NTES']

stocks = ["AKBNK", "ISCTR", "YKBNK", "SKBNK", "GARAN", "HALKB", "TSKB", "ALBRK", "VAKBN", "KLNMA"]

In [15]:
data = pd.DataFrame()

for stock in stocks:
  stock = stock + ".IS"
  stock_df = yf.download(stock, start="2015-01-01", end="2023-01-01", progress=False)
  stock_df = create_indicators(stock_df)
  stock_df["signal"] = 0

  for index, row in stock_df.iterrows():
    if index > 0 and index < stock_df.shape[0]-1 and stock_df["Close"].iloc[index+1] > ((2*stock_df["Close"].iloc[index]/100) + stock_df["Close"].iloc[index]):
      stock_df["signal"].iloc[index] = 1

  stock_df = stock_df.drop(["Date", "Open", "High", "Low", "Close", "Volume", "Adj Close"], axis=1)

  data = pd.concat([data, stock_df], ignore_index=True)

In [16]:
data.tail()

Unnamed: 0,RSI-14,RSI-60,STOCH-K,STOCH-D,WILLIAMS,AROON,CCI,ROC,ADX,+DI,-DI,ULTIMATE-OSC,MONEY-FLOW,KST,TSI,WMA-30,signal
19953,49.814437,49.786073,8.530728,15.675096,-96.02646,76.0,-58.657692,-6.819517,31.33158,47.46471,24.184811,43.648163,0.116118,68.057153,3.487867,15.796623,0
19954,45.146527,48.752437,6.080325,9.72641,-87.719303,72.0,-205.02606,-5.904059,30.1509,42.745549,31.722708,40.826177,0.118093,60.474896,1.724541,15.791595,0
19955,43.548472,48.380987,6.587669,7.06624,-96.49123,68.0,-216.488998,-5.624998,29.054555,41.670426,30.924828,26.165667,0.052477,52.957042,-0.216641,15.773019,0
19956,45.72104,48.797553,9.502923,7.390306,-87.280697,56.0,-145.623497,-3.831656,28.081596,41.044808,30.069624,34.743168,-0.008599,47.528919,-1.312423,15.76782,0
19957,47.682861,49.174938,12.426899,9.50583,-78.947377,56.0,-79.949146,-2.022754,27.398652,42.591683,29.280653,42.419685,-0.096907,44.203279,-1.741708,15.774728,0


In [17]:
#@title Data Preprocessing
df = pd.DataFrame(data)

input_columns = df.columns[:data.shape[1]-1]
output_column = "signal"

df[output_column] = df[output_column].astype(int)

X = df[input_columns].values
y = df[output_column].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

oversampler = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight("balanced", classes=[0, 1], y=y_train)
class_weight = {cls: weight for cls, weight in zip([0, 1], class_weights)}
class_weight

{0: 0.5940616163119512, 1: 3.1578322784810124}

In [None]:
#@title Regression Model

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

grid_search = GridSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_grid=param_grid,
    scoring='neg_mean_squared_error',  # Negative MSE for optimization
    cv=5  # Cross-validation folds
)

grid_search.fit(X_train_resampled, y_train_resampled)

# Get the best parameters from the grid search
best_params = grid_search.best_params_

# Train the Random Forest model using the best parameters
model = RandomForestRegressor(**best_params, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Best Hyperparameters: {best_params}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

In [18]:
#@title LSTM Model

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_resampled.shape[1], 1)),

    tf.keras.layers.LSTM(256, return_sequences=True),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.LSTM(256),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('lstm_model.h5', save_best_only=True)

model.fit(X_train_resampled, y_train_resampled, epochs=1000, batch_size=512, validation_split=0.2, class_weight=class_weight,
          callbacks=[early_stopping, model_checkpoint])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000


<keras.callbacks.History at 0x7899d415d510>

In [None]:
#@title Conv1D model
from tensorflow import keras
from tensorflow.keras import layers

X_train_resampled = np.expand_dims(X_train_resampled, axis=2)
X_test = np.expand_dims(X_test, axis=2)

def create_model():
    input_layer = keras.Input(shape=(X_train_resampled.shape[1], X_train_resampled.shape[2]))

    x = layers.Conv1D(
        filters=32, kernel_size=3, strides=2, activation="relu", padding="same"
    )(input_layer)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=64, kernel_size=3, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=128, kernel_size=5, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=256, kernel_size=5, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=512, kernel_size=7, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv1D(
        filters=1024, kernel_size=7, strides=2, activation="relu", padding="same"
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dropout(0.2)(x)

    x = layers.Flatten()(x)

    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(
        2048, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(
        1024, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(
        128, activation="relu", kernel_regularizer=keras.regularizers.L2()
    )(x)
    output_layer = layers.Dense(1, activation="sigmoid")(x)

    return tf.keras.Model(inputs=input_layer, outputs=output_layer)

model = create_model()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('/content/drive/MyDrive/best_model_2_gpt_deneme.h5', save_best_only=True)

model.fit(X_train_resampled, y_train_resampled, epochs=1000, batch_size=64, validation_split=0.2,
          callbacks=[early_stopping, model_checkpoint], class_weight=class_weight)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

In [7]:
# model.save("xu030-long-deneme.h5")
model = tf.keras.models.load_model("/content/lstm_model.h5")
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 1.1068357229232788
Test Accuracy: 0.8357720971107483


In [None]:
for stock in ["AKBNK.IS"]:
  predictions = []

  stock_data = yf.download(stock, start="2023-01-01", end="2023-08-30", progress=False)
  stock_data = create_indicators(stock_data)

  stock_data["signal"] = 0

  for index, row in stock_data.iterrows():
    if index > 0 and index < stock_data.shape[0]-1 and stock_data["Close"].iloc[index+1] > ((2*stock_data["Close"].iloc[index]/100) + stock_data["Close"].iloc[index]):
      stock_data["signal"].iloc[index] = 1

  stock_data = stock_data.drop(["Open", "High", "Low", "Close", "Adj Close", "Volume"], axis=1)

  for index, row in stock_data.iterrows():
      x = row[1:data.shape[1]]

      new_data = x.to_numpy().reshape(1, -1)
      new_data = scaler.transform(new_data)
      prediction = model.predict(new_data, verbose=None)

      print(f"Prediction for date {row[0]} {row[-1]}: {np.round(prediction[0][0])}")

      if np.round(prediction[0][0]) == row["signal"]:
        predictions.append(1)
      elif np.round(prediction[0][0]) != row["signal"]:
        predictions.append(0)

  print(f"{stock} Accuracy: {predictions.count(1) / len(predictions) * 100}")

Prediction for date 2023-03-31 00:00:00 0: 1.0
Prediction for date 2023-04-03 00:00:00 0: 1.0
Prediction for date 2023-04-04 00:00:00 0: 1.0
Prediction for date 2023-04-05 00:00:00 0: 1.0
Prediction for date 2023-04-06 00:00:00 0: 1.0
Prediction for date 2023-04-07 00:00:00 1: 1.0
Prediction for date 2023-04-10 00:00:00 1: 1.0
Prediction for date 2023-04-11 00:00:00 1: 1.0


In [None]:
buy_stocks = []

for stock in result_df["STOCK"].unique():
  stock_data = yf.download(stock, start="2021-06-01", end="2023-08-17", progress=False)
  stock_data = create_indicators(stock_data)

  change = ((stock_data["Close"].iloc[-1] - stock_data["Close"].iloc[-2]) / stock_data["Close"].iloc[-2])*100
  change = round(change, 2)

  stock_data = stock_data.drop(["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"], axis=1)

  x = stock_data.iloc[-2]

  new_data = x.to_numpy().reshape(1, -1)
  new_data = scaler.transform(new_data)
  prediction = model.predict(new_data, verbose=None)

  if round(prediction[0][0]*100,2) > 75:
    buy_stocks.append([stock, round(prediction[0][0]*100,2), change])

buy_df = pd.DataFrame(buy_stocks, columns=["stock", "probability", "change"]).sort_values(by="probability", ascending=False)
print(f'Pozitif kapanan hisse sayısı: {buy_df[buy_df["change"] > 0].shape[0]}')
print(f'Negatif kapanan hisse sayısı: {buy_df[buy_df["change"] < 0].shape[0]}')
print(f'Günlük değişim ortalaması: %{round(buy_df["change"].sum()/buy_df.shape[0], 2)}\n')
buy_df

In [None]:
2023-08-08
1	BIMAS.IS	98.39	-1.34
4	PGSUS.IS	97.33	9.52
2	GARAN.IS	96.60	-0.43
5	PETKM.IS	90.65	5.20
3	GUBRF.IS	90.02	-0.40
0	ARCLK.IS	83.10	1.14

2023-08-08
2	PGSUS.IS	100.00	0.33
1	GARAN.IS	97.05	3.03
0	BIMAS.IS	96.98	1.54
3	SASA.IS	95.37	-3.57
4	TCELL.IS	93.31	0.94

2023-08-09
0	BIMAS.IS	99.82	9.99
2	PETKM.IS	99.82	0.33
3	THYAO.IS	99.22	-0.69
1	GARAN.IS	92.52	9.91

2023-08-10
2	TOASO.IS	98.16	-4.08
0	GARAN.IS	85.31	2.83
1	SAHOL.IS	76.00	-0.84

2023-08-11
1	KRDMD.IS	100.00	2.44
2	THYAO.IS	97.89	2.79
0	FROTO.IS	90.15	2.24

2023-08-14
3	THYAO.IS	99.93	-1.62
0	AKBNK.IS	99.74	-2.20
1	GARAN.IS	96.38	0.63
2	TAVHL.IS	87.66	5.62

2023-08-15
3	THYAO.IS	100.00	-0.65
0	AKBNK.IS	99.94	0.53
2	SASA.IS	99.93	-0.99
1	GARAN.IS	97.06	-0.63

2023-08-16
1	PGSUS.IS	99.99	-1.96
0	GARAN.IS	99.91	-3.15
2	SASA.IS	99.26	-1.28
3	PETKM.IS	76.96	0.98

2023-08-17
1	PGSUS.IS	100.00	4.82
5	TOASO.IS	100.00	2.20
0	ODAS.IS	99.83	-1.25
6	ISCTR.IS	97.81	0.94
7	YKBNK.IS	94.71	2.77
3	SASA.IS	84.09	0.00
4	TAVHL.IS	81.61	0.39
2	SAHOL.IS	79.27	0.74

2023-08-18
0	EKGYO.IS	99.99	-4.26
3	TOASO.IS	99.95	-3.51
1	GARAN.IS	99.64	-6.55
2	PGSUS.IS	99.56	-4.95
4	ISCTR.IS	96.21	-3.49