In [10]:
import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta

# Fetch the data
data = yf.download(tickers = 'GOOG', start = '2012-03-11', end = '2022-07-10')

# Calculate SMA and RSI as features
data['SMA_50'] = ta.sma(data['Close'], length=50)
data['RSI_14'] = ta.rsi(data['Close'], length=14)

# Create labels: 1 if next day's close is higher than today's close, else 0
data['Next_Close'] = data['Close'].shift(-1) # Next day's close
data['Target'] = (data['Next_Close'] > data['Close']).astype(int)

# Drop rows with NaN values because of the SMA and RSI calculations
data_clean = data.dropna()

# Show the prepared dataset
data_clean.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA_50,RSI_14,Next_Close,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-05-21,14.956689,15.334772,14.943986,15.295419,15.295419,123477094,15.472615,50.484136,14.963912,0
2012-05-22,15.278732,15.287947,14.84436,14.963912,14.963912,122533571,15.470448,44.89838,15.179603,1
2012-05-23,14.985082,15.18309,14.872255,15.179603,15.179603,127600492,15.466304,48.862867,15.035145,0
2012-05-24,15.172131,15.240873,14.915842,15.035145,15.035145,75935562,15.460162,46.452273,14.733027,0
2012-05-25,14.968893,14.987075,14.65208,14.733027,14.733027,143813034,15.445417,41.806979,14.803015,1


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Splitting the dataset
X = data_clean[['Close', 'SMA_50', 'RSI_14']]
y = data_clean['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Building the logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predicting and evaluating the model
predictions = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)

accuracy


0.5529411764705883

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical

# Assuming X and y are already defined
def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)


# Scale features
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

# Reshape data for LSTM
time_steps = 10
X_seq, y_seq = create_sequences(X_scaled, y, time_steps)
y_seq = to_categorical(y_seq)

# Split data into training and test sets
X_train_seq, X_test_seq = X_seq[:int(len(X_seq)*0.8)], X_seq[int(len(X_seq)*0.8):]
y_train_seq, y_test_seq = y_seq[:int(len(y_seq)*0.8)], y_seq[int(len(y_seq)*0.8):]

# Build LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(time_steps, X.shape[1])),
    Dropout(0.2),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train_seq, y_train_seq, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

# Evaluate model
test_loss, test_acc = model.evaluate(X_test_seq, y_test_seq)
print(f"Test Accuracy: {test_acc}")


  ys.append(y[i + time_steps])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5551180839538574
