In [None]:
import wrds
import pandas as pd
import numpy as np
from typing import List
import os

conn = wrds.Connection()

In [2]:
def get_stock_data(symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetches stock data for a given symbol within a specified date range
    from WRDS CRSP database.

    Args:
        symbol (str): Ticker symbol of the stock.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.
        conn (wrds.Connection): Connection object to WRDS database.

    Returns:
        pd.DataFrame: DataFrame containing the fetched stock data.
    """
    query = f"""SELECT c.date AS Date,
                        c.permco AS company_code,
                        c.permno AS security_code,
                        c.openprc AS Open, 
                        c.askhi AS High,
                        c.bidlo AS Low, 
                        c.prc AS Close, 
                        c.vol AS Volume,
                        c.cfacpr AS splits_adjustment_factor,
                        c.ret AS adjusted_returns,
                        c.retx AS returns_without_dividends,
                        i.ticker AS symbol,
                        i.uesindustry AS industry,
                        d.divamt AS dividend_amount
                    FROM crsp.dsf c
                        JOIN crsp.stksecurityinfohdr i ON c.permno = i.permno
                        LEFT JOIN crsp.dse d ON c.permno = d.permno AND c.date = d.date
                    WHERE c.date >= '{start_date}'
                        AND c.date <= '{end_date}'
                        AND i.ticker = '{symbol}'
                """
    return conn.raw_sql(query, date_cols=['Date'])

In [3]:

def process_stock_data(symbol: str, start_date: str, end_date: str = None, fix_price: bool = False):
    """
    Process stock data for a given symbol.

    Args:
        symbol (str): Ticker symbol of the stock.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format. Defaults to None.
    """
    stock_data = get_stock_data(symbol=symbol, start_date=start_date, end_date=pd.Timestamp.today().strftime('%Y-%m-%d') if end_date is None else end_date)
    try:
        start_date = stock_data.iloc[0]['date']
        end_date = stock_data.iloc[-1]['date']
    except Exception as e:
        print(f'{symbol}: {e}')
        return 
    exported_file_name = f'stocks_data/{symbol}_from_{start_date}_to_{end_date}' if end_date is not None else f'stocks_data/{symbol}_from_{start_date}'
    
    stock_data['date'] = pd.to_datetime(stock_data['date'])
    stock_data['symbol'] = stock_data['symbol'].astype(str)
    stock_data['industry'] = stock_data['industry'].astype(str)
    
    stock_data.columns = stock_data.columns.str.lower()
        
    stock_data.to_pickle(f'{exported_file_name}.pickle')
    stock_data.to_csv(f'{exported_file_name}.csv')

In [4]:
def read_symbols_from_file(file_path: str) -> List[str]:
    """
    Reads stock symbols from a file.

    Returns:
        List[str]: List of stock symbols.
    """
    with open(file_path, 'r') as file:
        symbols = file.read().splitlines()
    return symbols

In [None]:
# Main execution
symbols = read_symbols_from_file('sp500.txt')
start_time = '2005-01-01'
end_time = '2024-06-18'

for symbol in symbols:
    process_stock_data(symbol, start_time, end_time, fix_price = False)
    print(f'{symbol} passed')

In [None]:
import os
import zipfile

def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=folder_path)
                zipf.write(file_path, arcname)

# Specify the folder to be zipped and the output zip file
folder_to_zip = 'stocks_data'
output_zip_file = 'stocks_data.zip'

# Zip the folder
zip_folder(folder_to_zip, output_zip_file)

print(f'Folder {folder_to_zip} has been zipped into {output_zip_file}')
