In [1]:
%pip install yfinance alpha_vantage pytrends vaderSentiment pandas numpy
%pip install tensorflow
%pip install torch torchvision torchaudio


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:

import yfinance as yf
from alpha_vantage.fundamentaldata import FundamentalData
from pytrends.request import TrendReq
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import requests

ALPHA_VANTAGE_API_KEY = 'RHK2ZEEA907NBXOM'
# fd = FundamentalData(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')
# pytrends = TrendReq(hl='en-US', tz=360)
# sentiment_analyzer = SentimentIntensityAnalyzer()

def get_user_input():
  while True:
    ticker = input("Enter the company ticker symbol (e.g., AAPL): ").upper()
    if len(ticker) > 5 or not ticker.isalpha():
      print("Invalid ticker symbol. Please enter a 5-letter or below ticker symbol.")
    else:
      return ticker

# 1. get the stock price data
def get_stock_price_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# 2. Financial Data (Alpha Vantage)
def get_financial_statements(ticker):
    fd = FundamentalData(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')
    balance_sheet, _ = fd.get_balance_sheet_annual(ticker)
    income_statement, _ = fd.get_income_statement_annual(ticker)
    return balance_sheet, income_statement

# 3. Google Trends Sentiment Data
def get_google_trends_data(keyword):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload([keyword], cat=0, timeframe='today 5-y', geo='', gprop='')
    trends_data = pytrends.interest_over_time()
    return trends_data

# 4. Sentiment Analysis (VADER) on News Headlines
def get_sentiment_score(text):
    sentiment_analyzer = SentimentIntensityAnalyzer()
    sentiment = sentiment_analyzer.polarity_scores(text)
    return sentiment['compound']

# 5. Data Normalization (Min-Max Scaling)
def normalize_data(df):
    # Check if the DataFrame is empty and return if so
    if df.empty:
        return df
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    return pd.DataFrame(scaled_data, columns=df.columns)

# 6. Get the first 5 days of daily stock data from Alpha Vantage
def get_alpha_vantage_data(ticker):
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'
    response = requests.get(url)
    data = response.json()

    # Extract the time series data
    time_series = data.get("Time Series (Daily)", {})

    # Print the first 5 days of stock data
    print(f"\nFirst 5 Days of Stock Data for {ticker}:")
    for i, (date, daily_data) in enumerate(time_series.items()):
        if i >= 5:  # Print only the first 5 days
            break
        print(f"\nDate: {date}")
        for key, value in daily_data.items():
            print(f"{key}: {value}")

# 7. Full Data Pipeline
def collect_data_pipeline(ticker, keyword, start_date, end_date):
    # Stock Data
    stock_data = get_stock_price_data(ticker, start_date, end_date)

    # Financial Statements
    balance_sheet, income_statement = get_financial_statements(ticker)

    # Google Trends Data
    trends_data = get_google_trends_data(keyword)

    # Example Sentiment Text
    example_headlines = ["Company releases new product", "Earnings beat expectations"]
    sentiment_scores = [get_sentiment_score(text) for text in example_headlines]

    # Data Aggregation
    # Ensure date indices and column names match before concatenation
    combined_data = pd.merge(stock_data, trends_data, left_index=True, right_index=True, how='inner')

    # Normalization
    normalized_data = normalize_data(combined_data)

    return normalized_data, balance_sheet, income_statement, sentiment_scores

# Example Usage
# ticker = 'AAPL'
# keyword = 'Apple stock'
# start_date = '2015-01-01'
# end_date = '2023-01-01'

ticker = get_user_input()
keyword = f"{ticker} stock"
start_date = '2015-01-01'
end_date = '2023-01-01'

# Collect and print Alpha Vantage stock data
get_alpha_vantage_data(ticker)

# Collect the data
normalized_data, balance_sheet, income_statement, sentiment_scores = collect_data_pipeline(ticker, keyword, start_date, end_date)

# Display Results
print("Normalized Stock and Trend Data for", ticker)
print(normalized_data.head())

print("\nBalance Sheet:")
print(balance_sheet.head())

print("\nIncome Statement:")
print(income_statement.head())

print("\nSentiment Scores:")
print(sentiment_scores)



First 5 Days of Stock Data for AAPL:

Date: 2024-10-03
1. open: 225.1400
2. high: 226.8050
3. low: 223.3200
4. close: 225.6700
5. volume: 34044158

Date: 2024-10-02
1. open: 225.8900
2. high: 227.3700
3. low: 223.0200
4. close: 226.7800
5. volume: 32880605

Date: 2024-10-01
1. open: 229.5200
2. high: 229.6500
3. low: 223.7400
4. close: 226.2100
5. volume: 63285048

Date: 2024-09-30
1. open: 230.0400
2. high: 233.0000
3. low: 229.6500
4. close: 233.0000
5. volume: 54793391

Date: 2024-09-27
1. open: 228.4600
2. high: 229.5200
3. low: 227.3000
4. close: 227.7900
5. volume: 34025967


[*********************100%***********************]  1 of 1 completed


Normalized Stock and Trend Data for AAPL
Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume, AAPL stock, isPartial]
Index: []

Balance Sheet:
                              fiscalDateEnding reportedCurrency   totalAssets  \
date                                                                            
1970-01-01 00:00:00.000000000       2023-09-30              USD  352583000000   
1970-01-01 00:00:00.000000001       2022-09-30              USD  352755000000   
1970-01-01 00:00:00.000000002       2021-09-30              USD  351002000000   
1970-01-01 00:00:00.000000003       2020-09-30              USD  323888000000   
1970-01-01 00:00:00.000000004       2019-09-30              USD  338516000000   

                              totalCurrentAssets  \
date                                               
1970-01-01 00:00:00.000000000       143566000000   
1970-01-01 00:00:00.000000001       135405000000   
1970-01-01 00:00:00.000000002       134836000000   
1970-01-01 00