In [34]:
import os
import time
import requests
import numpy as np
import pandas as pd
import pandas_ta as ta
import yfinance as yf
from dotenv import load_dotenv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Load environment variables
load_dotenv()
api_key = os.getenv("POLYGON_API_KEY")


In [35]:
symbol = "SPY"
start_date = "2020-01-01"
end_date = "2024-01-01"
filename = f"{symbol}_data.csv"

if os.path.exists(filename):
    df = pd.read_csv(filename, parse_dates=["date"])
else:
    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/day/{start_date}/{end_date}?adjusted=true&sort=asc&limit=50000&apiKey={api_key}"
    response = requests.get(url)
    data = response.json()['results']
    df = pd.DataFrame(data)
    df['t'] = pd.to_datetime(df['t'], unit='ms')
    df.rename(columns={'t': 'date', 'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume'}, inplace=True)
    df = df[['date', 'open', 'high', 'low', 'close', 'volume']]
    df.to_csv(filename, index=False)

df.set_index('date', inplace=True)
df.head()


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-10 04:00:00,406.61,409.69,405.97,409.61,63681042.0
2023-04-11 04:00:00,410.26,411.18,408.92,409.72,59297945.0
2023-04-12 04:00:00,411.87,412.17,407.44,408.05,86413179.0
2023-04-13 04:00:00,409.18,413.84,407.9922,413.47,85785954.0
2023-04-14 04:00:00,412.81,415.09,410.06,412.46,78155845.0


In [36]:
df['rsi'] = ta.rsi(df['close'], length=14)
df['macd'] = ta.macd(df['close'])['MACD_12_26_9']
df['sma50'] = ta.sma(df['close'], length=50)
df['ema20'] = ta.ema(df['close'], length=20)
df['atr'] = ta.atr(df['high'], df['low'], df['close'])

df.dropna(inplace=True)
df.head()


Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,sma50,ema20,atr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-20 04:00:00,437.45,438.37,435.03,437.18,75935359.0,66.377267,6.470936,417.876,428.89285,4.519668
2023-06-21 04:00:00,436.16,436.99,434.33,434.94,76840751.0,62.218352,6.143438,418.3826,429.468769,4.3974
2023-06-22 04:00:00,433.95,436.62,433.6,436.51,70510175.0,63.924472,5.942083,418.9184,430.139362,4.296715
2023-06-23 04:00:00,432.93,435.06,432.47,433.21,91981537.0,57.996211,5.453362,419.4216,430.431804,4.277981
2023-06-26 04:00:00,432.62,434.61,431.19,431.44,72723560.0,55.047438,4.867118,419.781,430.527823,4.215466


In [37]:
symbol = "SPY"
url = f"https://api.polygon.io/v3/reference/tickers/{symbol}?apiKey={api_key}"

response = requests.get(url)
data = response.json()

# Check if the response is valid
if 'results' in data:
    result = data['results']
    fundamentals = {
        'name': result.get('name'),
        'market_cap': result.get('market_cap'),
        'share_class_shares_outstanding': result.get('share_class_shares_outstanding'),
        'weighted_shares_outstanding': result.get('weighted_shares_outstanding'),
        'total_employees': result.get('total_employees'),
        'sector': result.get('sic_description'),
    }

    # Assign to all rows in your df
    for key, value in fundamentals.items():
        df[key] = value

    print(f"Pulled fundamentals from Polygon for {symbol}:")
    print(fundamentals)

else:
    print("Failed to retrieve fundamentals from Polygon.io")


Pulled fundamentals from Polygon for SPY:
{'name': 'SPDR S&P 500 ETF Trust', 'market_cap': None, 'share_class_shares_outstanding': 1016280000, 'weighted_shares_outstanding': None, 'total_employees': None, 'sector': None}


In [38]:
np.random.seed(42)
df['daily_sentiment'] = np.random.normal(loc=0.02, scale=0.1, size=len(df))
df.head()


Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,sma50,ema20,atr,name,market_cap,share_class_shares_outstanding,weighted_shares_outstanding,total_employees,sector,daily_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-06-20 04:00:00,437.45,438.37,435.03,437.18,75935359.0,66.377267,6.470936,417.876,428.89285,4.519668,SPDR S&P 500 ETF Trust,,1016280000,,,,0.069671
2023-06-21 04:00:00,436.16,436.99,434.33,434.94,76840751.0,62.218352,6.143438,418.3826,429.468769,4.3974,SPDR S&P 500 ETF Trust,,1016280000,,,,0.006174
2023-06-22 04:00:00,433.95,436.62,433.6,436.51,70510175.0,63.924472,5.942083,418.9184,430.139362,4.296715,SPDR S&P 500 ETF Trust,,1016280000,,,,0.084769
2023-06-23 04:00:00,432.93,435.06,432.47,433.21,91981537.0,57.996211,5.453362,419.4216,430.431804,4.277981,SPDR S&P 500 ETF Trust,,1016280000,,,,0.172303
2023-06-26 04:00:00,432.62,434.61,431.19,431.44,72723560.0,55.047438,4.867118,419.781,430.527823,4.215466,SPDR S&P 500 ETF Trust,,1016280000,,,,-0.003415


In [39]:
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()


Shape: (135, 17)
Columns: ['open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'sma50', 'ema20', 'atr', 'name', 'market_cap', 'share_class_shares_outstanding', 'weighted_shares_outstanding', 'total_employees', 'sector', 'daily_sentiment']


Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,sma50,ema20,atr,name,market_cap,share_class_shares_outstanding,weighted_shares_outstanding,total_employees,sector,daily_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-06-20 04:00:00,437.45,438.37,435.03,437.18,75935359.0,66.377267,6.470936,417.876,428.89285,4.519668,SPDR S&P 500 ETF Trust,,1016280000,,,,0.069671
2023-06-21 04:00:00,436.16,436.99,434.33,434.94,76840751.0,62.218352,6.143438,418.3826,429.468769,4.3974,SPDR S&P 500 ETF Trust,,1016280000,,,,0.006174
2023-06-22 04:00:00,433.95,436.62,433.6,436.51,70510175.0,63.924472,5.942083,418.9184,430.139362,4.296715,SPDR S&P 500 ETF Trust,,1016280000,,,,0.084769
2023-06-23 04:00:00,432.93,435.06,432.47,433.21,91981537.0,57.996211,5.453362,419.4216,430.431804,4.277981,SPDR S&P 500 ETF Trust,,1016280000,,,,0.172303
2023-06-26 04:00:00,432.62,434.61,431.19,431.44,72723560.0,55.047438,4.867118,419.781,430.527823,4.215466,SPDR S&P 500 ETF Trust,,1016280000,,,,-0.003415
