In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

raw = pd.read_csv('stock_prices_10yr_all_companies.csv', header=None)

tickers = raw.iloc[0, 1:].values
features = raw.iloc[1, 1:].values
columns = ['Date'] + [f'{tickers[i]}_{features[i]}' for i in range(len(tickers))]

df = pd.DataFrame(raw.values[2:], columns=columns)

# Cleanup
df = df[df['Date'].astype(str) != 'Date']
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce')
df = df.dropna(subset=['Date'])  # drop rows with invalid dates

In [27]:
target = 'GOOGL'
cols = [f'Close_{target}', f'High_{target}', f'Low_{target}']
df[cols] = df[cols].astype(float)
df['Prev_Close'] = df[f'Close_{target}'].shift(1)

df['Target_Up'] = (df[f'Close_{target}'] > df['Prev_Close']).astype(int)

df.dropna(inplace=True)

scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[['Prev_Close']])

X = scaled
y = df['Target_Up'].values

split = int(len(X) * 0.92)
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]
dates_test = df['Date'].iloc[split:]
real_close = df[f'Close_{target}'].iloc[split:].values

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

signals = np.where(y_pred == 1, 'Buy', 'Sell')

real_trend = np.where(np.diff(real_close) > 0, 1, 0)
pred_trend = y_pred[:-1]
trend_accuracy = accuracy_score(real_trend, pred_trend)

print(f"Trend prediction accuracy: {trend_accuracy:.2%}")
print("\nSample predictions:")
for i in range(10):
    print(f"{dates_test.iloc[i].date()} | Close: ${real_close[i]:.2f} | Signal: {signals[i]}")

Trend prediction accuracy: 53.24%

Sample predictions:
2024-09-13 | Close: $157.10 | Signal: Buy
2024-09-16 | Close: $157.70 | Signal: Buy
2024-09-17 | Close: $158.95 | Signal: Buy
2024-09-18 | Close: $159.44 | Signal: Buy
2024-09-19 | Close: $161.77 | Signal: Buy
2024-09-20 | Close: $163.21 | Signal: Buy
2024-09-23 | Close: $161.48 | Signal: Buy
2024-09-24 | Close: $161.92 | Signal: Buy
2024-09-25 | Close: $161.12 | Signal: Buy
2024-09-26 | Close: $162.36 | Signal: Buy
