In [1]:
# import libraries for scraping data

import os
import numpy as np
import pandas as pd
import yfinance as yf

In [2]:
# preview the columns available

preview_symbol = "MSFT"
preview_start = "2021-01-01"
preview_end = "2021-01-10"
preview_data = yf.download(preview_symbol, start=preview_start, end=preview_end)
preview_data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-04,222.529999,223.0,214.809998,217.690002,211.224304,37130100
2021-01-05,217.259995,218.520004,215.699997,217.899994,211.42807,23823000
2021-01-06,212.169998,216.490005,211.940002,212.25,205.945892,35930700
2021-01-07,214.039993,219.339996,213.710007,218.289993,211.806488,27694500
2021-01-08,218.679993,220.580002,217.029999,219.619995,213.097,22956200


In [3]:
msft = yf.Ticker("MSFT")
msft_data = msft.history(start=preview_start, end=preview_end, actions=True)
msft_dividends = msft_data['Dividends']
msft_dividends

Date
2021-01-04 00:00:00-05:00    0.0
2021-01-05 00:00:00-05:00    0.0
2021-01-06 00:00:00-05:00    0.0
2021-01-07 00:00:00-05:00    0.0
2021-01-08 00:00:00-05:00    0.0
Name: Dividends, dtype: float64

In [4]:
# Prepare datasets of 10 stocks each across 4 sectors

# stock symbol list
stocks = {
    'tech': ['AAPL', 'MSFT', 'GOOGL', 'META', 'INTC', 'NVDA', 'AMD', 'ORCL', 'IBM', 'TSLA'],
    'consumer_goods': ['AMZN', 'PG', 'KO', 'PEP', 'NKE', 'F', 'GE', 'MMM', 'EL', 'BUD'],
    'healthcare': ['PFE', 'JNJ', 'MRK', 'ABBV', 'LLY', 'GILD', 'BMY', 'AMGN', 'REGN', 'MDT'],
    'finance': ['JPM', 'BAC', 'WFC', 'C', 'GS', 'MS', 'AXP', 'V', 'BRK-B', 'SCHW']
}

# monitoring period
start_date = '2021-01-01'
end_date = '2023-12-31'

# fetch data
for sector, symbols in stocks.items():
    # make sector-wise directories
    directory = os.path.join('Historical', sector)
    if not os.path.exists(directory):
        os.makedirs(directory)

    for symbol in symbols:
        data = yf.download(symbol, start=start_date, end=end_date)
        stock = yf.Ticker(symbol)
        ticker_data = stock.history(start=start_date, end=end_date, actions=True)

        # incorporate dividends
        try:
            dividends = ticker_data['Dividends']
            dividends = dividends.reindex(data.index, fill_value=0)
            if (dividends>0.0).any():
                print("NON-ZERO DIVIDENDS")
        except:
            dividends = 0.0

        # add helpful columns
        data['Dividends'] = dividends
        data['Symbol'] = symbol
        data['Sector'] = sector

        # timestamps
        data.reset_index(inplace=True)
        
        # save to csv
        filename = f"Historical/{sector}/{symbol}_{start_date}_{end_date}.csv"
        data.to_csv(filename, index=False)
        print(f"Data for {symbol} saved to {sector} directory")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for AAPL saved to tech directory
Data for MSFT saved to tech directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Data for GOOGL saved to tech directory
Data for META saved to tech directory



[*********************100%%**********************]  1 of 1 completed


Data for INTC saved to tech directory
Data for NVDA saved to tech directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for AMD saved to tech directory
Data for ORCL saved to tech directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for IBM saved to tech directory
Data for TSLA saved to tech directory
Data for AMZN saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Data for PG saved to consumer_goods directory



[*********************100%%**********************]  1 of 1 completed


Data for KO saved to consumer_goods directory
Data for PEP saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for NKE saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed


Data for F saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed


Data for GE saved to consumer_goods directory
Data for MMM saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for EL saved to consumer_goods directory
Data for BUD saved to consumer_goods directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for PFE saved to healthcare directory
Data for JNJ saved to healthcare directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for MRK saved to healthcare directory
Data for ABBV saved to healthcare directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for LLY saved to healthcare directory
Data for GILD saved to healthcare directory


[*********************100%%**********************]  1 of 1 completed


Data for BMY saved to healthcare directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for AMGN saved to healthcare directory
Data for REGN saved to healthcare directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for MDT saved to healthcare directory
Data for JPM saved to finance directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for BAC saved to finance directory
Data for WFC saved to finance directory


[*********************100%%**********************]  1 of 1 completed


Data for C saved to finance directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for GS saved to finance directory
Data for MS saved to finance directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for AXP saved to finance directory
Data for V saved to finance directory


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data for BRK-B saved to finance directory
Data for SCHW saved to finance directory
