In [42]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Get the list of S&P 500 tickers
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
tickers = sp500['Symbol'].tolist()

# Efficiently download the VIX historical data once
vix = yf.Ticker('^VIX')
vix_df = vix.history(period="15y")['Close']

# Initialize an empty list to store the data and variables to track issues
data = []
max_days = 0  # Track the maximum number of trading days
failed_tickers = []  # Track tickers that fail to process

# Loop through each valid ticker and download the data
for ticker in tqdm(tickers, desc="Retrieving data"):
    try:
        stock = yf.Ticker(ticker)
        df = stock.history(period="15y")
        
        # Ensure VIX data is aligned with the stock's DataFrame
        df['VIX'] = df.index.map(vix_df.reindex(df.index, method='nearest'))

        # Calculate RSI
        delta = df['Close'].diff()
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        avg_gain = gain.rolling(window=14).mean()
        avg_loss = loss.rolling(window=14).mean()
        rs = avg_gain / avg_loss
        df['RSI'] = 100 - (100 / (1 + rs))

        # Calculate Exponential Moving Averages
        df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
        df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()

        # For simplicity, current market cap and P/E ratio are added directly
        # Note: These are not historical values
        df['Market Cap'] = stock.info['marketCap']
        df['P/E Ratio'] = stock.info['trailingPE']

        # Restructure the data into a tensor format
        tensor_data = df[['Open', 'High', 'Low', 'Close', 'Volume', 'RSI', 'EMA_12', 'EMA_26', 'VIX', 'Market Cap', 'P/E Ratio']].values
        tensor_data = np.expand_dims(tensor_data, axis=1)

        # Update the maximum number of trading days
        max_days = max(max_days, df.shape[0])

        data.append(tensor_data)
    except Exception as e:
        failed_tickers.append(ticker)
        print(f"Failed to process {ticker}: {str(e)}")
        continue

# After processing, check for failed tickers
print("Failed tickers:", failed_tickers)

Retrieving data:   0%|          | 1/503 [00:00<04:27,  1.87it/s]

Failed to process MMM: 'trailingPE'


Retrieving data:   4%|▍         | 19/503 [00:06<02:55,  2.75it/s]

Failed to process ALL: 'trailingPE'


Retrieving data:  12%|█▏        | 61/503 [00:22<02:46,  2.65it/s]

Failed to process BAX: 'trailingPE'


Retrieving data:  12%|█▏        | 62/503 [00:22<02:49,  2.60it/s]BRK.B: No data found, symbol may be delisted
Retrieving data:  13%|█▎        | 63/503 [00:26<09:00,  1.23s/it]

Failed to process BRK.B: 'marketCap'


Retrieving data:  13%|█▎        | 65/503 [00:26<05:45,  1.27it/s]

Failed to process BIO: 'trailingPE'


Retrieving data:  14%|█▍        | 70/503 [00:28<03:06,  2.32it/s]

Failed to process BA: 'trailingPE'


Retrieving data:  16%|█▌        | 78/503 [00:31<02:50,  2.49it/s]BF.B: Period '15y' is invalid, must be one of ['1mo', '3mo', '6mo', 'ytd', '1y', '2y', '5y', '10y', 'max']
Retrieving data:  16%|█▌        | 79/503 [00:31<02:27,  2.88it/s]

Failed to process BF.B: 'marketCap'


Retrieving data:  18%|█▊        | 89/503 [00:35<02:27,  2.81it/s]

Failed to process CCL: 'trailingPE'


Retrieving data:  18%|█▊        | 91/503 [00:35<02:12,  3.12it/s]

Failed to process CTLT: 'trailingPE'


Retrieving data:  21%|██        | 104/503 [00:40<02:35,  2.56it/s]


KeyboardInterrupt: 

2799


In [None]:
# Collecting data from FRED

import pandas_datareader as pdr
import datetime

# Download the data
start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2021, 1, 1)
gdp = pdr.get_data_fred('GDP', start, end)
unemployment = pdr.get_data_fred('UNRATE', start, end)
cpi = pdr.get_data_fred('CPIAUCSL', start, end)
consumer_confidence = pdr.get_data_fred('UMCSENT', start, end)
m1 = pdr.get_data_fred('M1', start, end)
m2 = pdr.get_data_fred('M2', start, end)
ten_year = pdr.get_data_fred('GS10', start, end)
thirty_year = pdr.get_data_fred('GS30', start, end)

# Federal Reserve Bank of St. Louis: Wilshire 5000 Price Index
# Federal Reserve Bank of St. Louis: US Gross Domestic Product

In [1]:
# myapp.py
import logging
import mylib
logger = logging.getLogger(__name__)

def main():
    logging.basicConfig(filename='myapp.log', level=logging.INFO)
    logger.info('Started')
    mylib.do_something()
    logger.info('Finished')

if __name__ == '__main__':
    main()

ModuleNotFoundError: No module named 'mylib'