In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import requests
import os

 # Creates shared session to reduce some rate limit

symbols = ['AAPL']

for symbol in symbols:    
    session = requests.Session()    
    df = yf.download(symbol, session=session, period="1y", interval="1d", auto_adjust=True, group_by="column")

    # Reset index to flatten DataFrame and avoid multi-index issues
    df = df.reset_index()

    # Remove any unnecessary multi-index or redundant columns if they exist
    
    if 'Ticker' in df.columns:
        df = df.drop(columns=['Ticker'])
    
    rootPath =   os.path.abspath(os.path.join(os.getcwd()))
    directoryPath = os.path.join(rootPath, "Data",symbol)
    # Save the cleaned DataFrame correctly
    if not os.path.exists(directoryPath):
        os.makedirs(directoryPath, exist_ok=True)

    filePath = os.path.join(directoryPath, f"stock_data_{symbol}.csv")   
    if os.path.exists(filePath):
        os.remove(filePath)    

    # Save CSV with proper index label
    df.to_csv(filePath, index=False)
    
    print(f"Data for {symbol} saved")   

In [2]:
# Gets stock data from finnhub
import finnhub
import os
from dotenv import load_dotenv

load_dotenv()

finnhub_client = finnhub.Client(api_key=os.getenv("FINNHUB_API_KEY"))
df = finnhub_client.quote('AAPL')
print(df)

{'c': 238.35, 'd': 0.32, 'dp': 0.1344, 'h': 240.07, 'l': 236.16, 'o': 237.705, 'pc': 238.03, 't': 1741118710}


In [None]:
import yfinance as yf
import json
symbol = 'AAPL'
stock = yf.Ticker(symbol)
info = stock.info
# Fetch financial statements
balance_sheet = stock.balance_sheet
income_statement = stock.financials
cashflow_statement = stock.cashflow
 # Extract last 4 quarters of data
last_4_quarters = balance_sheet.columns[:4]

def safe_get(df, key, columns):
    """Safely get the value from DataFrame if the key exists."""
    if key in df.index:
        return df.loc[key, columns].to_dict()
    else:
        return {}

result = {
    "symbol": symbol,
    "market_cap": info.get("marketCap"),
    "pe_ratio": info.get("trailingPE"),
    "eps": info.get("trailingEps"),    
    "dividend_yield": info.get("dividendYield"),
    "sector": info.get("sector"),
    "industry": info.get("industry"),
    "beta": stock.info.get("beta"),
    "current_assets": safe_get(balance_sheet, "Total Current Assets", last_4_quarters),
    "current_liabilities": safe_get(balance_sheet, "Total Current Liabilities", last_4_quarters),
    "operating_income": safe_get(income_statement, "Operating Income", last_4_quarters),
    "total_revenue": safe_get(income_statement, "Total Revenue", last_4_quarters),
    "free_cash_flow": safe_get(cashflow_statement, "Free Cash Flow", last_4_quarters)
}

print("Info: {info}")
print(json.dumps(result, indent=4))


KeyError: 'Total Current Assets'

In [None]:
# Convert the dictionary to a DataFrame and display stock information
import pandas as pd
res = pd.DataFrame(list(df.items()), columns=['Metrics', 'Values'])
display(res)


In [None]:
#Loading AAPL data from local file
import pandas as pd
symbol = 'AAPL'
rootPath =   os.path.abspath(os.path.join(os.getcwd()))
directoryPath = os.path.join(rootPath, "Data",symbol)
filePath = os.path.join(directoryPath, f"stock_data_{symbol}.csv")   
df = pd.read_csv(filePath,skiprows=[1])
print(df.columns.to_list())


In [None]:
#Creating technical indicators

import pandas as pd
import numpy as np
import ta  # Technical Analysis library for indicators
import os

def custom_zigzag(prices, threshold=0.05):
    """Custom ZigZag indicator that identifies significant price reversals."""
    trend = np.zeros(len(prices))
    last_pivot = prices[0]

    for i in range(1, len(prices)):
        change = (prices[i] - last_pivot) / last_pivot
        if abs(change) >= threshold:  # Significant movement detected
            trend[i] = prices[i]
            last_pivot = prices[i]
        else:
            trend[i] = np.nan  # Minor fluctuations ignored
    return pd.Series(trend)


# Ensure 'Date' column is datetime type and set as index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)


# Calculate Simple Moving Averages (SMA)
df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
df['SMA_200'] = ta.trend.sma_indicator(df['Close'], window=200)

# Calculate Exponential Moving Averages (EMA)
df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)
df['EMA_200'] = ta.trend.ema_indicator(df['Close'], window=200)

# Calculate Relative Strength Index (RSI)
df['RSI_7'] = ta.momentum.rsi(df['Close'], window=7)
df['RSI_14'] = ta.momentum.rsi(df['Close'], window=14)
df['RSI_200'] = ta.momentum.rsi(df['Close'], window=200)

# Calculate MACD (Moving Average Convergence Divergence)
df['MACD'] = ta.trend.macd(df['Close'], window_slow=13, window_fast=6)

# Calculate Average True Range (ATR)
df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)

# Apply Custom ZigZag Trend Detection
df['ZigZag'] = custom_zigzag(df['Close'].values, threshold=0.05)

# Save processed data with technical indicators
rootPath =   os.path.abspath(os.path.join(os.getcwd()))
directoryPath = os.path.join(rootPath, "Data",symbol)
outputPath = os.path.join(directoryPath, f"technical_indicators_{symbol}.csv")   
if os.path.exists(outputPath):
        os.remove(outputPath)
df.to_csv(outputPath)

print(f"Technical indicators for {symbol} saved to {outputPath}")


In [None]:
# Price Forecasting Model:

import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os
    
"""Train an XGBoost regression model to forecast stock prices (Close, High, Low)."""
# Load technical indicators from CSV
rootPath =   os.path.abspath(os.path.join(os.getcwd()))
directoryPath = os.path.join(rootPath, "Data",symbol)
filePath = os.path.join(directoryPath, f"technical_indicators_{symbol}.csv")   

df = pd.read_csv(filePath, index_col='Date', parse_dates=True)

# Prepare features using technical indicators
features = df[['SMA_20', 'SMA_200', 'EMA_20', 'EMA_200', 'RSI_7', 'RSI_14', 'RSI_200', 'MACD', 'ATR']]

# Define target variables: Predict next day's Close, High, and Low prices
target_close = df['Close'].shift(-1)  # Predict next day's close price
target_high = df['High'].shift(-1)    # Predict next day's high price
target_low = df['Low'].shift(-1)      # Predict next day's low price

# Remove rows with missing values
valid_rows = ~target_close.isna()
features = features[valid_rows]
target_close = target_close[valid_rows]
target_high = target_high[valid_rows]
target_low = target_low[valid_rows]

# Check for NaN or infinite values and handle them
features = features.fillna(0)
features.replace([np.inf, -np.inf], 0, inplace=True)

# Normalize the features for better model performance
scaler = MinMaxScaler()
X = scaler.fit_transform(features)

def train_single_target(y, target_name):
    # Split data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Initialize and train the XGBoost regressor
    model = xgb.XGBRegressor(
        n_estimators=100,
        learning_rate=0.05,
        max_depth=5,
        random_state=42
    )
    model.fit(X_train, y_train)

    # Evaluate the model using Mean Squared Error (MSE)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f"Mean Squared Error for {symbol} - {target_name}: {mse}")

    # Save the model    
    model_dir = os.path.join(rootPath, "Models","Price_Forecast",symbol)
    os.makedirs(model_dir, exist_ok=True)
    filePath = os.path.join(model_dir, f"Price_Forecast_{symbol}_{target_name}.json")       
    model.save_model(filePath)

    return model

# Train models for each target (Close, High, Low)
close_model = train_single_target(target_close, "close")
high_model = train_single_target(target_high, "high")
low_model = train_single_target(target_low, "low")

print(f"Trained and saved XGBoost regression models for {symbol}.")


In [None]:
# Trend Classification Model 
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import os

symbol = 'AAPL'

rootPath =   os.path.abspath(os.path.join(os.getcwd()))
directoryPath = os.path.join(rootPath, "Data",symbol)

 # Load technical indicators from CSV
filePath = os.path.join(directoryPath, f"technical_indicators_{symbol}.csv")  
df = pd.read_csv(filePath, index_col='Date', parse_dates=True)

# Prepare features using technical indicators
features = df[['SMA_20', 'SMA_200', 'EMA_20', 'EMA_200', 'RSI_7', 'RSI_14', 'RSI_200', 'MACD', 'ATR']]

# Define target variable: Trend classification
df['price_change'] = df['Close'].pct_change().shift(-1)
df['trend'] = df['price_change'].apply(lambda x: 1 if x > 0.005 else -1 if x < -0.005 else 0)

# Ensure valid rows
valid_rows = ~df['trend'].isna()
features = features[valid_rows]
target_trend = df['trend'][valid_rows]

# Handle NaNs and infinite values in features
features = features.fillna(0)
features.replace([np.inf, -np.inf], 0, inplace=True)

# Normalize features
scaler = MinMaxScaler()
X = scaler.fit_transform(features)

# Map trend labels to non-negative values for classification
y = target_trend.map({-1: 0, 0: 1, 1: 2})

# Ensure target labels are integer values
y = y.astype(int)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the XGBoost classifier
model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.05, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
predictions = model.predict(X_test)
print(f"Classification Report for {symbol}:")
print(classification_report(y_test, predictions))
print(f"Confusion Matrix for {symbol}:")
print(confusion_matrix(y_test, predictions))

# Save the trained model
directoryPath = os.path.join(rootPath, "Models", "Trend_Classification", symbol)
model_dir = os.path.join(directoryPath, f"Trend_classification_{symbol}.json")
if os.path.exists(model_dir):
        os.remove(model_dir)
os.makedirs(directoryPath, exist_ok=True)
model.save_model(model_dir)

print(f"Trained and saved trend classification model for {symbol}.")

In [None]:
#Real time prediction

import yfinance as yf
import pandas as pd
import xgboost as xgb
import os
from sklearn.preprocessing import MinMaxScaler
import ta # Technical analysis library


symbol = 'AAPL'
rootPath =   os.path.abspath(os.path.join(os.getcwd()))


# Function to compute necessary technical indicators from real-time data
def compute_real_time_indicators(df):
    """Calculate technical indicators from the latest real-time stock data."""      

    print(df.columns)
    # Calculate SMA, EMA, RSI, MACD, ATR
    df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['SMA_200'] = ta.trend.sma_indicator(df['Close'], window=200)
    df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)
    df['EMA_200'] = ta.trend.ema_indicator(df['Close'], window=200)
    df['RSI_7'] = ta.momentum.rsi(df['Close'], window=7)
    df['RSI_14'] = ta.momentum.rsi(df['Close'], window=14)
    df['RSI_200'] = ta.momentum.rsi(df['Close'], window=200)
    df['MACD'] = ta.trend.macd(df['Close'], window_slow=13, window_fast=6)
    df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'], window=14)

    # Select the last row (most recent data) for prediction
    pd.set_option('future.no_silent_downcasting', True)
    latest_data = df.iloc[-1][['SMA_20', 'SMA_200', 'EMA_20', 'EMA_200', 'RSI_7', 'RSI_14', 'RSI_200', 'MACD', 'ATR']].fillna(0)
    return latest_data.values.reshape(1, -1)  # Reshape for prediction

# Function to load a trained XGBoost model
def load_model(symbol, target):
    """Load the trained model for the given symbol and target price."""

    model_path = os.path.join(rootPath, "Models", "Price_Forecast", symbol, f"Price_Forecast_{symbol}_{target}.json")
    if os.path.exists(model_path):
        model = xgb.XGBRegressor()
        model.load_model(model_path)
        return model
    else:
        raise FileNotFoundError(f"Model for {symbol} - {target} not found.")


filePath = os.path.join(rootPath, "Data", symbol, f"stock_data_{symbol}.csv")

if not os.path.exists(filePath):         
    df = yf.download(
        symbol,
        period="30d",  # Fetch the latest 5 days for calculating indicators
        interval="1d",  # Daily interval
        auto_adjust=True,  # Adjust for dividends and splits
        group_by="column"  # Ensure flat data structure
    )

    # Reset index to flatten Date index and remove multi-index issues
    df = df.reset_index()

    # Remove 'Ticker' column if it exists to avoid redundancy
    if 'Ticker' in df.columns:
        df = df.drop(columns=['Ticker']) 

    # Save to CSV with the proper index label
    df.to_csv(filePath)     
else:
    df = pd.read_csv(filePath, skiprows=[1])

"""Fetch real-time data, compute indicators, and predict future prices."""
indicators = compute_real_time_indicators(df)

# Scale features before prediction
scaler = MinMaxScaler()
indicators_scaled = scaler.fit_transform(indicators)

# Load trained models
close_model = load_model(symbol, "close")
high_model = load_model(symbol, "high")
low_model = load_model(symbol, "low")

# Make predictions
predicted_close = close_model.predict(indicators_scaled)[0]
predicted_high = high_model.predict(indicators_scaled)[0]
predicted_low = low_model.predict(indicators_scaled)[0]

print(f"Symbol: {symbol}\nPredicted Close: {round(predicted_close, 2)}\nPredicted High: {round(predicted_high, 2)}\nPredicted Low: {round(predicted_low, 2)}")


In [None]:
# Market Sentiment
import requests
from dotenv import load_dotenv
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load environment variables
load_dotenv()
# Load FinBERT model
tokenizer = BertTokenizer.from_pretrained("ProsusAI/finbert")
model = BertForSequenceClassification.from_pretrained("ProsusAI/finbert")

query="AAPL"
page_size=10

"""Fetch latest news and compute sentiment score."""
url = f"https://newsapi.org/v2/everything?q={query}&pageSize={page_size}&apiKey={os.getenv("NEWS_API_KEY")}"
response = requests.get(url)
articles = response.json().get("articles", [])

news_data = []
for article in articles:
    text = article["title"] + " " + (article["description"] if article["description"] else "")
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment = probs[0][2].item() - probs[0][0].item()  # Positive - Negative sentiment

    news_data.append({
        "title": article["title"],
        "sentiment_score": sentiment
    })
print(news_data)

[{'title': '9to5Mac Daily: February 3, 2025 – AAPL earnings, AR projects', 'sentiment_score': 0.9048196859657764}, {'title': 'AAPL climbed 3% on DeepSeek news, as other tech stocks fell', 'sentiment_score': -0.776688490062952}, {'title': 'Home Depot earnings, Fed, consumer confidence: What to Watch', 'sentiment_score': 0.8836971297860146}, {'title': 'After strong earnings, Morgan Stanley inches up AAPL target to $275', 'sentiment_score': -0.9172416217625141}, {'title': 'Third Point boosts its stake in these ‘Magnificent Seven’ stocks — but offloads this one', 'sentiment_score': -0.9093109704554081}, {'title': 'Apple just unveiled a new version of its cheaper iPhone', 'sentiment_score': 0.6541318744421005}, {'title': 'Trump to Apple: Ditch DEI', 'sentiment_score': 0.8662710040807724}, {'title': "Apple earnings are coming. Here's what to expect", 'sentiment_score': 0.8794791251420975}, {'title': 'Apple reports record earnings, but misses iPhone estimates', 'sentiment_score': -0.344693541