<a href="https://colab.research.google.com/github/carlosvalenciano/Project4/blob/main/Stock_Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
# Requirements
!pip install yahoo_fin



In [12]:
import numpy as np
import time as tm
import datetime as dt
import tensorflow as tf
import pandas as pd

# Data preparation
from yahoo_fin import stock_info as yf

In [38]:
# SETTINGS

# Window size or the sequence length, 7 (1 week)
N_STEPS = 7

# Lookup steps, 1 is the next day, 3 = after tomorrow
LOOKUP_STEPS = [1, 2, 3]

# Stock ticker, GOOGL
STOCK = input('Enter a ticker. ')
print(f"You entered {STOCK}.")


# Current date
date_now = tm.strftime('%Y-%m-%d')
date_3_years_back = (dt.date.today() - dt.timedelta(days=1104)).strftime('%Y-%m-%d')

Enter a ticker. META
You entered META.


In [39]:
# LOAD DATA
# from yahoo_fin
# for 1104 bars with interval = 1d (one day)
df = yf.get_data(
    STOCK,
    start_date=date_3_years_back,
    end_date=date_now,
    interval='1d')

In [40]:
df

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2020-12-03,286.250000,286.649994,281.070007,281.850006,281.850006,12921700,META
2020-12-04,280.299988,283.459991,279.299988,279.700012,279.700012,10880300,META
2020-12-07,279.190002,288.489990,278.200012,285.579987,285.579987,13007700,META
2020-12-08,286.010010,286.429993,281.549988,283.399994,283.399994,10747700,META
2020-12-09,283.660004,287.630005,271.750000,277.920013,277.920013,25189700,META
...,...,...,...,...,...,...,...
2023-12-05,318.980011,321.880005,315.390015,318.290009,318.290009,16952100,META
2023-12-06,321.929993,322.250000,317.040009,317.450012,317.450012,11294300,META
2023-12-07,317.769989,328.239990,317.769989,326.589996,326.589996,15905100,META
2023-12-08,323.089996,333.170013,323.000000,332.750000,332.750000,14077500,META


In [41]:
# Find the relative daily price and the volume for the prior day
df_filter = df["adjclose"].pct_change() * 100
df_filter = df_filter.rename("Today")
df_filter = df_filter.reset_index()
df_filter["Volume"] = df["volume"].shift(1).values / 1000_000_000

In [42]:
# View dataframe
df_filter

Unnamed: 0,index,Today,Volume
0,2020-12-03,,
1,2020-12-04,-0.762815,0.012922
2,2020-12-07,2.102243,0.010880
3,2020-12-08,-0.763356,0.013008
4,2020-12-09,-1.933656,0.010748
...,...,...,...
755,2023-12-05,-0.540585,0.019037
756,2023-12-06,-0.263909,0.016952
757,2023-12-07,2.879188,0.011294
758,2023-12-08,1.886158,0.015905


In [43]:
# Find the stock prices for the last five days
for i in range(1,6):
    df_filter["Lag " + str(i)] = df_filter["Today"].shift(i)

df_filter

Unnamed: 0,index,Today,Volume,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5
0,2020-12-03,,,,,,,
1,2020-12-04,-0.762815,0.012922,,,,,
2,2020-12-07,2.102243,0.010880,-0.762815,,,,
3,2020-12-08,-0.763356,0.013008,2.102243,-0.762815,,,
4,2020-12-09,-1.933656,0.010748,-0.763356,2.102243,-0.762815,,
...,...,...,...,...,...,...,...,...
755,2023-12-05,-0.540585,0.019037,-1.477747,-0.712207,-1.520174,-2.003003,1.281738
756,2023-12-06,-0.263909,0.016952,-0.540585,-1.477747,-0.712207,-1.520174,-2.003003
757,2023-12-07,2.879188,0.011294,-0.263909,-0.540585,-1.477747,-0.712207,-1.520174
758,2023-12-08,1.886158,0.015905,2.879188,-0.263909,-0.540585,-1.477747,-0.712207


In [44]:
# Drop the na
df_filter = df_filter.dropna()
df_filter

Unnamed: 0,index,Today,Volume,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5
6,2020-12-11,-1.288253,0.020065,-0.287859,-1.933656,-0.763356,2.102243,-0.762815
7,2020-12-14,0.233966,0.014391,-1.288253,-0.287859,-1.933656,-0.763356,2.102243
8,2020-12-15,0.496001,0.016377,0.233966,-1.288253,-0.287859,-1.933656,-0.763356
9,2020-12-16,0.043559,0.023980,0.496001,0.233966,-1.288253,-0.287859,-1.933656
10,2020-12-17,-0.431676,0.015885,0.043559,0.496001,0.233966,-1.288253,-0.287859
...,...,...,...,...,...,...,...,...
755,2023-12-05,-0.540585,0.019037,-1.477747,-0.712207,-1.520174,-2.003003,1.281738
756,2023-12-06,-0.263909,0.016952,-0.540585,-1.477747,-0.712207,-1.520174,-2.003003
757,2023-12-07,2.879188,0.011294,-0.263909,-0.540585,-1.477747,-0.712207,-1.520174
758,2023-12-08,1.886158,0.015905,2.879188,-0.263909,-0.540585,-1.477747,-0.712207


In [45]:
# Find the price change direction
df_filter["Direction"] = [1 if i > 0 else 0 for i in df_filter["Today"]]
df_filter

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filter["Direction"] = [1 if i > 0 else 0 for i in df_filter["Today"]]


Unnamed: 0,index,Today,Volume,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5,Direction
6,2020-12-11,-1.288253,0.020065,-0.287859,-1.933656,-0.763356,2.102243,-0.762815,0
7,2020-12-14,0.233966,0.014391,-1.288253,-0.287859,-1.933656,-0.763356,2.102243,1
8,2020-12-15,0.496001,0.016377,0.233966,-1.288253,-0.287859,-1.933656,-0.763356,1
9,2020-12-16,0.043559,0.023980,0.496001,0.233966,-1.288253,-0.287859,-1.933656,1
10,2020-12-17,-0.431676,0.015885,0.043559,0.496001,0.233966,-1.288253,-0.287859,0
...,...,...,...,...,...,...,...,...,...
755,2023-12-05,-0.540585,0.019037,-1.477747,-0.712207,-1.520174,-2.003003,1.281738,0
756,2023-12-06,-0.263909,0.016952,-0.540585,-1.477747,-0.712207,-1.520174,-2.003003,0
757,2023-12-07,2.879188,0.011294,-0.263909,-0.540585,-1.477747,-0.712207,-1.520174,1
758,2023-12-08,1.886158,0.015905,2.879188,-0.263909,-0.540585,-1.477747,-0.712207,1


In [46]:
# Assign the variables: X to Lag 1, Lag 2, Lag 3, Lag 4, Lag 5, Volume and y to Direction
# Set the first 550 for training and last 550 for testing
from sklearn.linear_model import LogisticRegression
X = df_filter[["Lag 1", "Lag 2", "Lag 3", "Lag 4", "Lag 5", "Volume"]]
y = df_filter[["Direction"]]

X_train = X[0:550]
X_test = X[550:]

y_train = y[0:550]
y_test = y[550:]

model = LogisticRegression().fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


In [47]:
# Show the predicted accuracy
model.score(X_test, y_test)

0.5049019607843137

In [48]:
# Separate the data into labels and features

# Separate the y variable, the labels
y = df_filter['Direction']

# Separate the X variable, the features
X = df_filter.drop(columns=['Direction', 'index', 'Today'])

In [49]:
# Review the y variable Series
y.head()

6     0
7     1
8     1
9     1
10    0
Name: Direction, dtype: int64

In [50]:
# Review the X variable DataFrame
X.head()

Unnamed: 0,Volume,Lag 1,Lag 2,Lag 3,Lag 4,Lag 5
6,0.020065,-0.287859,-1.933656,-0.763356,2.102243,-0.762815
7,0.014391,-1.288253,-0.287859,-1.933656,-0.763356,2.102243
8,0.016377,0.233966,-1.288253,-0.287859,-1.933656,-0.763356
9,0.02398,0.496001,0.233966,-1.288253,-0.287859,-1.933656
10,0.015885,0.043559,0.496001,0.233966,-1.288253,-0.287859


In [51]:
# Check the balance of our target values
y.value_counts()

1    381
0    373
Name: Direction, dtype: int64

In [52]:
# Import the train_test_learn module
from sklearn.model_selection import train_test_split

# Split the data using train_test_split
# Assign a random_state of 1 to the function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [53]:
# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
lr_model = LogisticRegression(random_state=42)


# Fit the model using training data
lr_model.fit(X_train, y_train)

In [54]:
# Make a prediction using the testing data
testing_predictions = lr_model.predict(X_test)

In [55]:
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

# Print the balanced_accuracy score of the model
balanced_accuracy_score(y_test, testing_predictions)

0.4903955444419186

In [56]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, testing_predictions)

array([[54, 29],
       [71, 35]])

In [58]:
# Print the classification report for the model
target_names = ["Down", "Up"]
testing_report = classification_report(y_test, testing_predictions, target_names=target_names)

# Print the testing classification report
print(testing_report)

              precision    recall  f1-score   support

        Down       0.43      0.65      0.52        83
          Up       0.55      0.33      0.41       106

    accuracy                           0.47       189
   macro avg       0.49      0.49      0.47       189
weighted avg       0.50      0.47      0.46       189

