In [1]:
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

# Example tickers list provided
tickers = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'Meta', 'GOOGL', 'GOOG', 'BRK-B', 'AVGO', 'LLY', 'SPY']

# Calculate end_date as today and start_date as 60 days before today
end_date = datetime.now()
start_date = end_date - timedelta(days=90)

# Download historical data for all tickers
data = yf.download(tickers, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval='1d')
close_prices = data['Close'].copy()

# Check if there are any missing values
missing_values = close_prices.isnull().sum().sum()
close_prices.fillna(close_prices.ffill(), inplace=True)
# If there are missing values, we'll need to handle them, but first let's see if there are any.
# print(missing_values)

# Separating independent and dependent variables
X = close_prices.drop('SPY', axis=1)
y = close_prices['SPY']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing and fitting the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# print(mse, r2)

# Extracting the coefficients and the intercept
coefficients = model.coef_
intercept = model.intercept_

# Assuming 'tickers' is a list of your ticker symbols excluding 'SPY', as it's your dependent variable
# Adjust the tickers list as per your dataset
tickers = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'Meta', 'GOOGL', 'GOOG', 'BRK-B', 'AVGO', 'LLY']

# Displaying the coefficients and the intercept
# print("Intercept (beta_0):", intercept)
# for ticker, coefficient in zip(tickers, coefficients):
#     print(f"Coefficient for {ticker} (beta):", coefficient)

# Optionally, to format the prediction formula as a string
formula = f"SPY = {intercept:.4f} " + " ".join([f"+ ({coef:.4f} * {ticker})" if coef >= 0 else f"- ({abs(coef):.4f} * {ticker})" for ticker, coef in zip(tickers, coefficients)])
# print("\nPrediction Formula:\n", formula)

# Print the predicted SPY value
print(model.predict(X_test.iloc[0:1]))

[*********************100%%**********************]  11 of 11 completed
[586.03255846]


In [2]:
# Example tickers list provided
tickers = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'AVGO', 'META', 'GOOGL', 'GOOG', 'TSLA', 'COST', 'QQQ']

# Calculate end_date as today and start_date as 60 days before today
end_date = datetime.now()
start_date = end_date - timedelta(days=90)

# Download historical data for all tickers
data = yf.download(tickers, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval='1d')
close_prices = data['Close'].copy()

# Check if there are any missing values
missing_values = close_prices.isnull().sum().sum()
close_prices.fillna(close_prices.ffill(), inplace=True)
# If there are missing values, we'll need to handle them, but first let's see if there are any.
# print(missing_values)

# Separating independent and dependent variables
X = close_prices.drop('QQQ', axis=1)
y = close_prices['QQQ']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing and fitting the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions on the test set
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# print(mse, r2)


# Extracting the coefficients and the intercept
coefficients = model.coef_
intercept = model.intercept_

# Assuming 'tickers' is a list of your ticker symbols excluding 'SPY', as it's your dependent variable
# Adjust the tickers list as per your dataset
tickers = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'AVGO', 'META', 'GOOGL', 'GOOG', 'TSLA', 'COST']

# Displaying the coefficients and the intercept
# print("Intercept (beta_0):", intercept)
# for ticker, coefficient in zip(tickers, coefficients):
#     print(f"Coefficient for {ticker} (beta):", coefficient)

# Optionally, to format the prediction formula as a string
formula = f"QQQ = {intercept:.4f} " + " ".join([f"+ ({coef:.4f} * {ticker})" if coef >= 0 else f"- ({abs(coef):.4f} * {ticker})" for ticker, coef in zip(tickers, coefficients)])
# print("\nPrediction Formula:\n", formula)

# Print the predicted SPY value
print(model.predict(X_test.iloc[0:1]))

[*********************100%%**********************]  11 of 11 completed
[497.53481671]


In [3]:
# Example tickers list provided
tickers = ['SPY']

# Calculate end_date as today and start_date as 60 days before today
end_date = datetime.now()
start_date = end_date - timedelta(days=59)

# Download historical data for all tickers with 5-minute intervals
data = yf.download(tickers, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval='5m')

# Extract Open, High, Low, and Close prices
open_prices = data['Open'].copy()
close_prices = data['Close'].copy()
high_prices = data['High'].copy()
low_prices = data['Low'].copy()

# Create a new DataFrame to hold the candle data
df_candles = pd.DataFrame({
    'Open': open_prices,
    'High': high_prices,
    'Low': low_prices,
    'Close': close_prices
})


# Add a 'candle_profile' column to indicate if the candle is bearish or bullish
df_candles['candle_profile'] = df_candles.apply(lambda row: 'bearish' if row['Open'] > row['Close'] else 'bullish', axis=1)

# Add candle total price range column
df_candles['Price_Range'] = df_candles['High'] - df_candles['Low']

# Add candle body size column
df_candles['Price_Body_Size'] = abs(df_candles['Open'] - df_candles['Close'])

# Add candle wick size column
df_candles['Price_Wick_Size'] = df_candles['Price_Range'] - df_candles['Price_Body_Size']

# Add upper shadow column
df_candles['Price_Upper_Shadow'] = df_candles['High'] - df_candles[['Open', 'Close']].max(axis=1)

# Add lower shadow column
df_candles['Price_Lower_Shadow'] = df_candles[['Open', 'Close']].min(axis=1) - df_candles['Low']

# Add candle body percentage column
df_candles['Body_Percentage'] = df_candles['Price_Body_Size'] / df_candles['Price_Range']

# Add candle wick percentage column
df_candles['Wick_Percentage'] = df_candles['Price_Wick_Size'] / df_candles['Price_Range']

# Add upper shadow percentage column
df_candles['Upper_Shadow_Percentage'] = df_candles['Price_Upper_Shadow'] / df_candles['Price_Range']

# Add lower shadow percentage column
df_candles['Lower_Shadow_Percentage'] = df_candles['Price_Lower_Shadow'] / df_candles['Price_Range']

# Add candle strength column
df_candles['Candle_Strength'] = df_candles.apply(lambda row: 'strong' if row['Price_Body_Size'] > row['Price_Wick_Size'] else 'weak', axis=1)

# Add a wick price action pressure column
df_candles['Wick_Price_Action_Pressure'] = df_candles.apply(lambda row: 'Selling' if row['Price_Upper_Shadow'] > row['Price_Lower_Shadow'] else 'Buying', axis=1)

summary_stats = df_candles.describe()

print(summary_stats)

#df_candles.head()


[*********************100%%**********************]  1 of 1 completed
              Open         High          Low        Close  Price_Range  \
count  3198.000000  3198.000000  3198.000000  3198.000000  3198.000000   
mean    579.800539   580.033454   579.550856   579.798910     0.482598   
std       9.222534     9.200808     9.243458     9.220466     0.273614   
min     565.330017   566.109985   565.270020   565.340027     0.070007   
25%     571.402512   571.609985   571.180023   571.396240     0.299927   
50%     579.404999   579.625000   579.115051   579.392517     0.410126   
75%     584.233475   584.417480   584.039978   584.215027     0.589966   
max     599.890015   600.169983   599.619995   599.859985     2.562500   

       Price_Body_Size  Price_Wick_Size  Price_Upper_Shadow  \
count      3198.000000      3198.000000         3198.000000   
mean          0.237255         0.245343            0.115102   
std           0.234254         0.164165            0.108557   
min         