<a href="https://colab.research.google.com/github/alvingeorge567/DeepLearningStockPrice/blob/main/LongTermStockPricePredicter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance alpha_vantage pytrends vaderSentiment pandas numpy
!pip install tensorflow
!pip install torch torchvision torchaudio



Collecting alpha_vantage
  Downloading alpha_vantage-3.0.0-py3-none-any.whl.metadata (12 kB)
Collecting pytrends
  Downloading pytrends-4.9.2-py3-none-any.whl.metadata (13 kB)
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading alpha_vantage-3.0.0-py3-none-any.whl (35 kB)
Downloading pytrends-4.9.2-py3-none-any.whl (15 kB)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment, pytrends, alpha_vantage
Successfully installed alpha_vantage-3.0.0 pytrends-4.9.2 vaderSentiment-3.3.2


In [2]:
import yfinance as yf
from alpha_vantage.fundamentaldata import FundamentalData
from pytrends.request import TrendReq
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import requests
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

ALPHA_VANTAGE_API_KEY = 'RHK2ZEEA907NBXOM'
# fd = FundamentalData(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')
# pytrends = TrendReq(hl='en-US', tz=360)
# sentiment_analyzer = SentimentIntensityAnalyzer()

def get_user_input():
  while True:
    ticker = input("Enter the company ticker symbol (e.g., AAPL): ").upper()
    if len(ticker) > 5 or not ticker.isalpha():
      print("Invalid ticker symbol. Please enter a 5-letter or below ticker symbol.")
    else:
      return ticker

# New function for LSTM model development
def create_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape, return_sequences=True),
        LSTM(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    #model.compile(optimizer='adam', loss='mse')
    return model

# New function to prepare data for LSTM
def prepare_data_for_lstm(data, look_back=60):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back)])
        y.append(data[i + look_back])
    return np.array(X), np.array(y)

# 1. get the stock price data
def get_stock_price_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# 2. Financial Data (Alpha Vantage)
def get_financial_statements(ticker):
    fd = FundamentalData(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')
    balance_sheet, _ = fd.get_balance_sheet_annual(ticker)
    income_statement, _ = fd.get_income_statement_annual(ticker)
    return balance_sheet, income_statement

# 3. Google Trends Sentiment Data
def get_google_trends_data(keyword):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([keyword], cat=0, timeframe='today 5-y', geo='', gprop='')
    trends_data = pytrends.interest_over_time()
    return trends_data

# 4. Sentiment Analysis (VADER) on News Headlines
def get_sentiment_score(text):
    sentiment_analyzer = SentimentIntensityAnalyzer()
    sentiment = sentiment_analyzer.polarity_scores(text)
    return sentiment['compound']

# 5. Data Normalization (Min-Max Scaling)
def normalize_data(df):
    # Check if the DataFrame is empty and return if so
    if df.empty:
        return df
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    return pd.DataFrame(scaled_data, columns=df.columns)

# 6. Get the first 5 days of daily stock data from Alpha Vantage
def get_alpha_vantage_data(ticker):
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'
    response = requests.get(url)
    data = response.json()

    # Extract the time series data
    time_series = data.get("Time Series (Daily)", {})

    # Print the first 5 days of stock data
    print(f"\nFirst 5 Days of Stock Data for {ticker}:")
    for i, (date, daily_data) in enumerate(time_series.items()):
        if i >= 5:  # Print only the first 5 days
            break
        print(f"\nDate: {date}")
        for key, value in daily_data.items():
            print(f"{key}: {value}")

# 7. Full Data Pipeline
def collect_data_pipeline(ticker, keyword, start_date, end_date):
    # Stock Data
    stock_data = get_stock_price_data(ticker, start_date, end_date)

    # Financial Statements
    balance_sheet, income_statement = get_financial_statements(ticker)

    # Google Trends Data
    trends_data = get_google_trends_data(keyword)

    # Example Sentiment Text
    example_headlines = ["Company releases new product", "Earnings beat expectations"]
    sentiment_scores = [get_sentiment_score(text) for text in example_headlines]

    # Data Aggregation
    # Ensure date indices and column names match before concatenation
    combined_data = pd.merge(stock_data, trends_data, left_index=True, right_index=True, how='inner')

    # Normalization
    normalized_data = normalize_data(combined_data)

    return normalized_data, balance_sheet, income_statement, sentiment_scores, stock_data['Close']

# Example Usage
# ticker = 'AAPL'
# keyword = 'Apple stock'
# start_date = '2015-01-01'
# end_date = '2023-01-01'

# Main execution
if __name__ == "__main__":
  ticker = get_user_input()
  keyword = f"{ticker} stock"
  start_date = '2015-01-01'
  end_date = '2023-01-01'

  # Collect and print Alpha Vantage stock data
  get_alpha_vantage_data(ticker)

  # Collect the data
  normalized_data, balance_sheet, income_statement, sentiment_scores, closing_prices  = collect_data_pipeline(ticker, keyword, start_date, end_date)

  # Display Results
  print("Normalized Stock and Trend Data for", ticker)
  print(normalized_data.head())

  print("\nBalance Sheet:")
  print(balance_sheet.head())

  print("\nIncome Statement:")
  print(income_statement.head())

  print("\nSentiment Scores:")
  print(sentiment_scores)


  # Prepare data for LSTM
  scaler = MinMaxScaler(feature_range=(0, 1))
  scaled_prices = scaler.fit_transform(closing_prices.values.reshape(-1, 1))

  X, y = prepare_data_for_lstm(scaled_prices)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # Create and train LSTM model
  model = create_lstm_model((X_train.shape[1], 1))
  history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

  # Make predictions
  predictions = model.predict(X_test)

  # Inverse transform the predictions and actual values
  predictions = scaler.inverse_transform(predictions)
  y_test = scaler.inverse_transform(y_test)

  # Calculate RMSE
  rmse = np.sqrt(np.mean((predictions - y_test)**2))
  print(f"\nRoot Mean Squared Error: {rmse}")

  print("\nLSTM Model Summary:")
  model.summary()

  print("\nTraining complete. You can now use this model for forecasting.")

Enter the company ticker symbol (e.g., AAPL): aapl

First 5 Days of Stock Data for AAPL:

Date: 2024-11-11
1. open: 225.0000
2. high: 225.7000
3. low: 221.5000
4. close: 224.2300
5. volume: 42005602

Date: 2024-11-08
1. open: 227.1700
2. high: 228.6600
3. low: 226.4050
4. close: 226.9600
5. volume: 38328824

Date: 2024-11-07
1. open: 224.6250
2. high: 227.8750
3. low: 224.5700
4. close: 227.4800
5. volume: 42137691

Date: 2024-11-06
1. open: 222.6100
2. high: 226.0650
3. low: 221.1900
4. close: 222.7200
5. volume: 54561121

Date: 2024-11-05
1. open: 221.7950
2. high: 223.9500
3. low: 221.1400
4. close: 223.4500
5. volume: 28111338


[*********************100%***********************]  1 of 1 completed


TooManyRequestsError: The request failed: Google returned a response with code 429