In [None]:
# CRSP Test

import os
import wrds
from dotenv import load_dotenv
import pandas as pd

load_dotenv()
os.environ['PGPASSFILE'] = os.getenv("PGPASS_PATH")
wrds_username = os.getenv("WRDS_username")

# Initialize a connection object using with context manager
with wrds.Connection(wrds_username=wrds_username) as db:

    # Primary CRSP libraries we have access to:
    # crsp   -----> Annual updates
    # crspq  -----> Quarterly updates
    
    # Key CRSP tables in crspq:
    # crspq.dsf              -----> Daily Stock File (returns, prices, volume)
    # crspq.msf              -----> Monthly Stock File
    # crspq.stocknames       -----> Stock names and info
    # crspq.wrds_dsfv2_query -----> Enhanced daily stock file
    
    # Example 1: Get daily stock data for AAPL
    sample_query = """
    SELECT date, permno, permco, ticker, cusip, prc, vol, ret, shrout
    FROM crspq.dsf
    WHERE ticker = 'AAPL'
      AND date >= '2024-01-01'
      AND date <= '2024-01-31'
    ORDER BY date
    LIMIT 50
    """
    
    try:
        df = db.raw_sql(sample_query)
        print("\nCRSP Daily Stock File (dsf) - AAPL Jan 2024:")
        print(df.head(10))
    except Exception as e:
        print(f"\nFailed: {e}")

In [1]:
# Take a look at crsp parquet files we have downloaded
import os
from json import load
import pandas as pd
from pathlib import Path

# Go up one level from WRDS folder, then into Data/crsp
directory = Path.cwd().parent / "Data" / "crsp"

year = '2023'
file_path = directory / f"crsp_dsf_{year}.parquet"

print("File path:", file_path)
print("Does it exist?", file_path.exists())

df = pd.read_parquet(file_path)

print(df.columns)
#print(len(df))


File path: c:\Users\carso\Development\emerytrading\Data\crsp\crsp_dsf_2023.parquet
Does it exist? True
Index(['permno', 'hdrcusip', 'permco', 'siccd', 'nasdissuno', 'yyyymmdd',
       'sharetype', 'securitytype', 'securitysubtype', 'usincflg',
       'issuertype', 'primaryexch', 'conditionaltype', 'tradingstatusflg',
       'dlycaldt', 'dlydelflg', 'dlyprc', 'dlyprcflg', 'dlycap', 'dlycapflg',
       'dlyprevprc', 'dlyprevprcflg', 'dlyprevdt', 'dlyprevcap',
       'dlyprevcapflg', 'dlyret', 'dlyretx', 'dlyreti', 'dlyretmissflg',
       'dlyretdurflg', 'dlyorddivamt', 'dlynonorddivamt', 'dlyfacprc',
       'dlydistretflg', 'dlyvol', 'dlyclose', 'dlylow', 'dlyhigh', 'dlybid',
       'dlyask', 'dlyopen', 'dlynumtrd', 'dlymmcnt', 'dlyprcvol',
       'dlycumfacpr', 'dlycumfacshr', 'cusip', 'ticker', 'exchangetier',
       'shrout'],
      dtype='object')


In [None]:
# Looking at available fields in the fundq table 

import os
import wrds
from dotenv import load_dotenv
import pandas as pd

load_dotenv()
os.environ['PGPASSFILE'] = os.getenv("PGPASS_PATH")
wrds_username = os.getenv("WRDS_username")

# Initialize a connection object using with context manager
with wrds.Connection(wrds_username=wrds_username) as db:

    try:
        df = db.raw_sql("SELECT * FROM comp.fundq LIMIT 0")
        print(df.columns.tolist())
        print(len(df.columns.tolist()))
    except Exception as e:
        print(f"\nFailed: {e}")

In [None]:
# Looking at available fields in the comp.comapny table 

import os
import wrds
from dotenv import load_dotenv
import pandas as pd

load_dotenv()
os.environ['PGPASSFILE'] = os.getenv("PGPASS_PATH")
wrds_username = os.getenv("WRDS_username")

# Initialize a connection object using with context manager
with wrds.Connection(wrds_username=wrds_username) as db:

    try:
        df = db.raw_sql("SELECT * FROM comp.company LIMIT 0")
        print(df.columns.tolist())
        print(len(df.columns.tolist()))
    except Exception as e:
        print(f"\nFailed: {e}")