In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

nflx_data = pd.read_csv("Download Data - STOCK_US_XNAS_NFLX.csv")

# Create Binary Target Variable
# Shift the 'Close' price down to compare with the next day's price
nflx_data['Next_Close'] = nflx_data['Close'].shift(-1)
nflx_data['Price_Increase'] = (nflx_data['Next_Close'] > nflx_data['Close']).astype(int)

# Drop the last row as it will have NaN for 'Next_Close' after shifting
nflx_data.dropna(inplace=True)

nflx_data['Volume'] = nflx_data['Volume'].str.replace(',', '').astype(float)

# Selecting features and target variable for logistic regression
X = nflx_data[['Open', 'High', 'Low', 'Volume', 'Close']]  # Including 'Close' as a feature
y = nflx_data['Price_Increase']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Model Training
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

# Predicting on the test set
y_pred = logistic_model.predict(X_test)

# Model Evaluation

# the ratio of correctly predicted instances (both true positives and true negatives) to the total number of instances.
accuracy = accuracy_score(y_test, y_pred)

#ratio of true positive predictions to the total number of positive predictions (true positives and false positives).
precision = precision_score(y_test, y_pred, zero_division=0)

#the ratio of true positive predictions to the total number of actual positive instances (true positives and false negatives).
recall = recall_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)

accuracy, precision, recall, conf_matrix

(0.4,
 0.0,
 0.0,
 array([[2, 0],
        [3, 0]], dtype=int64))