In [5]:
import pandas as pd, csv
import os
import glob

In [6]:
# Create function to load schedules dealing quoting issues
def load_schedule(path):
    try:
        df = pd.read_csv(
            path,
            sep='\t',
            low_memory=False,
        ).drop(index=0).reset_index(drop=True)
        # after the file is read into df …
        if 'IDRSSD' in df.columns:
            df['IDRSSD'] = df['IDRSSD'].astype(str)   # <-- added
        return df
    except pd.errors.ParserError as err:
        print(f'ParserError in {path} -> {err}')
        # In this case, use csv.QUOTE_NONE
        df = pd.read_csv(
            path,
            sep='\t',
            quoting=csv.QUOTE_NONE,
            low_memory=False,
        ).drop(index=0).reset_index(drop=True)
        # remove quotes from df:
        df = df.replace({'"': ''}, regex=True)
        # remove quotes from column names:
        df.columns = df.columns.str.replace('"', '', regex=False)
        if 'IDRSSD' in df.columns:
            df['IDRSSD'] = df['IDRSSD'].astype(str)
        return df

In [7]:
cr_path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/raw/FFIEC/_extracted/'

# Set path to be the directory:
os.chdir(cr_path)

# Create a list with the last 8 digits of the names of the folders:
dates = [folder[-8:] for folder in os.listdir()]  

In [8]:
# Create an empty list to store the combined DataFrames for each date:
all_schedules = []

for date in dates:
    # Change the directory to the folder with the data:
    os.chdir(os.path.join(cr_path, 'FFIEC CDR Call Bulk All Schedules ' + date))
    print(f'Loading data for {date}...')
    
    # Load the schedules:
    rca = load_schedule(f'FFIEC CDR Call Schedule RCA {date}.txt')   # no special case now
    rcg = load_schedule(f'FFIEC CDR Call Schedule RCG {date}.txt')   # no special case now
    rc   = load_schedule(f'FFIEC CDR Call Schedule RC {date}.txt')
    rcci = load_schedule(f'FFIEC CDR Call Schedule RCCI {date}.txt')
    rca  = load_schedule(f'FFIEC CDR Call Schedule RCA {date}.txt')
    rcg  = load_schedule(f'FFIEC CDR Call Schedule RCG {date}.txt')   # no special case now
    rce1 = load_schedule(f'FFIEC CDR Call Schedule RCEI {date}.txt')
    por  = load_schedule(f'FFIEC CDR Call Bulk POR {date}.txt')
    rck  = load_schedule(f'FFIEC CDR Call Schedule RCK {date}.txt')
    ri   = load_schedule(f'FFIEC CDR Call Schedule RI {date}.txt')
    

    # Define 'rcl' based on file availability
    rco_files = glob.glob(f'FFIEC CDR Call Schedule RCO {date}*.txt')
    rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
    # Define 'rcl' based on file availability
    rcb_files = glob.glob(f'FFIEC CDR Call Schedule RCB {date}*.txt')
    rcb = pd.read_csv(rcb_files[0], sep='\t')
    # drop 'RCON1773' column if it exists:
    if 'RCON1773' in rcb.columns:
        rcb = rcb.drop(columns='RCON1773')

    # Merge the data on 'IDRSSD':
    dt = pd.merge(rc, rcci, on='IDRSSD')
    dt = pd.merge(dt, rca, on='IDRSSD')
    dt = pd.merge(dt, rcg, on='IDRSSD')
    dt = pd.merge(dt, rce1, on='IDRSSD')
    dt = pd.merge(dt, por, on='IDRSSD')
    dt = pd.merge(dt, rck, on='IDRSSD')
    dt = pd.merge(dt, ri, on='IDRSSD')
    dt = pd.merge(dt, rco, on='IDRSSD')
    dt = pd.merge(dt, rcb, on='IDRSSD')
    #dt = pd.merge(dt, rcl, on='IDRSSD')
    #dt = dt.iloc[:, :-1]            # Drop last column since it is always empty
    dt['Date'] = date

Loading data for 03312001...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312002...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312003...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312004...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312005...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312006...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312007...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312008...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312009...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312010...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312011...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312012...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312013...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312014...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312015...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312016...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312017...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312018...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312019...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312020...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312021...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312022...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312023...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 03312024...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302001...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302002...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302003...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302004...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302005...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302006...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302007...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302008...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302009...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302010...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302011...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302012...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302013...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302014...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302015...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302016...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302017...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302018...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302019...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302020...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302021...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302022...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302023...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 06302024...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302001...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302002...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302003...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302004...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302005...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302006...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302007...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302008...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302009...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302010...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302011...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302012...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302013...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302014...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302015...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302016...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302017...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302018...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302019...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302020...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302021...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302022...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302023...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 09302024...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312001...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312002...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312003...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312004...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312005...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312006...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312007...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312008...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312009...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312010...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312011...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312012...


  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312013...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312014...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312015...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312016...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312017...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312018...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312019...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312020...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312021...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312022...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


Loading data for 12312023...


  rco = pd.read_csv(rco_files[0], sep='\t').drop(index=0, errors='ignore').reset_index(drop=True)
  rcb = pd.read_csv(rcb_files[0], sep='\t')


In [9]:
rcb

Unnamed: 0,IDRSSD,RCFD0211,RCFD0213,RCFD0416,RCFD1286,RCFD1287,RCFD1737,RCFD1738,RCFD1739,RCFD1741,...,RCONG320,RCONG321,RCONG322,RCONG323,RCONG348,RCONG349,RCONG350,RCONG351,RCONG352,Unnamed: 241
0,,US TREAS SECS-HLD-TO-MAT-AMRTZ COST,US TREAS SECS-HLD-TO-MAT-FAIR VALUE,PLEDGED SECURITIES-BOOK VLAUE,US TREAS SECS-AVL-FOR-SLE-AMRTZ COST,US TREAS SECS-AVL-FOR-SLE-FAIR VALUE,OTHR DOMSTC DEBT SECS-HLD-TO-MAT-AMR,OTHR DOMSTC DEBT SECS-HLD-TO-MAT-FAI,OTHR DOMSTC DEBT SECS-AVL-FR-SLE-AMR,OTHR DOMSTC DEBT SECS-AVL-FR-SLE-FAI,...,MBS OTHR OTHR RES MBS HTM AMRTZ,MBS OTHR OTHR RES MBS HTM FV,MBS OTHR OTHR RES MBS AFS AMRTZ,MBS OTHR OTHR RES MBS AFS FV,SFP TPS ISS FNC INST HTM AMRTZ COST,SFP TPS ISS FNC INST HTM FV,SFP TPS ISS FNC INST AFS AMRTZ COST,SFP TPS ISS FNC INST AFS FV,SFP TPS ISS RE INV TR HTM AMRTZ COST,
1,37.0,,,,,,,,,,...,0,0,0,0,,,,,,
2,242.0,,,,,,,,,,...,0,0,0,0,,,,,,
3,279.0,,,,,,,,,,...,0,0,28056,27690,,,,,,
4,354.0,,,,,,,,,,...,0,0,0,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4637,5805479.0,,,,,,,,,,...,0.0,0.0,0.0,0.0,,,,,,
4638,5805488.0,,,,,,,,,,...,0.0,0.0,0.0,0.0,,,,,,
4639,5805817.0,,,,,,,,,,...,0.0,0.0,0.0,0.0,,,,,,
4640,5859511.0,,,,,,,,,,...,0.0,0.0,0.0,0.0,,,,,,
