In [1]:
import os
import pandas as pd
import numpy as np
import warnings

# ignore warnings:
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
class CallReports:
    def __init__(self, folder_path, essential_vars=None):
        """
        Initialize the analysis class with the folder path where 'call_reports.csv' is stored.
        
        Parameters:
          folder_path (str): Path to the folder containing 'call_reports.csv'.
          essential_vars (list, optional): List of columns that must always be included.
                Defaults to ['IDRSSD', 'Financial Institution Name', 'Date'].
        """
        self.folder_path = folder_path
        # Build full path for the call_reports.csv file.
        self.file_path = os.path.join(folder_path, "call_reports.csv")
        
        if essential_vars is None:
            self.essential_vars = ['IDRSSD', 'Financial Institution Name', 'Date']
        else:
            self.essential_vars = essential_vars
        
        # DataFrames will be loaded later, once variables to select are provided.
        self.df_selected = None
        self.df_constructed = None
        self.df_balanced = None

    def select_variables(self, variables=None):
        """
        Select a subset of columns for analysis and load only those columns from the CSV file.
        Essential variables are always included.
        
        Also, check for duplicate columns that come in pairs ending with '_x' and '_y'. For each pair,
        compute the maximum gap (absolute difference) between the entries. The maximum gap is printed,
        and if the gap is zero, the '_y' column is dropped (keeping the '_x' column).
        
        Finally, reorder the columns so that the essential variables (self.essential_vars)
        and the 'Year' column (if it exists) are the first columns in the DataFrame.
        
        Parameters:
        variables (list, optional): Additional variable names to include besides essential ones.
                                        If None, only essential variables will be selected.
        
        Returns:
        DataFrame with the selected (and cleaned) columns, with essential_vars and 'Year' ordered first.
        """
        # Combine the essential variables and any additional requested variables.
        if variables is None:
            vars_to_select = self.essential_vars.copy()
        else:
            vars_to_select = list(set(self.essential_vars + variables))
        
        # Read only the header of the CSV to know which columns exist.
        try:
            df_header = pd.read_csv(self.file_path, nrows=0)
        except Exception as e:
            raise IOError(f"Error reading file header from {self.file_path}: {e}")
        
        available_in_file = df_header.columns.tolist()
        
        # Warn if some requested variables are not in the file.
        missing_vars = [v for v in vars_to_select if v not in available_in_file]
        if missing_vars:
            print("Warning: The following variables are not in the data and will be skipped:", missing_vars)
        
        # Determine the final list of columns to load.
        available_vars = [v for v in vars_to_select if v in available_in_file]
        
        # Load only the selected columns from the CSV.
        self.df_selected = pd.read_csv(self.file_path, usecols=available_vars)
        
        # Ensure the 'Date' column is converted to datetime if present.
        if 'Date' in self.df_selected.columns:
            self.df_selected['Date'] = pd.to_datetime(self.df_selected['Date'], errors='coerce')
        
        # Check for duplicate variables that come with suffixes '_x' and '_y'.
        for col in self.df_selected.columns:
            if col.endswith("_x"):
                base = col[:-2]  # Remove the '_x' suffix
                col_y = base + "_y"
                if col_y in self.df_selected.columns:
                    try:
                        # Compute maximum absolute difference ("gap") between the two columns.
                        gap = (self.df_selected[col] - self.df_selected[col_y]).abs().max()
                    except Exception as e:
                        # If subtraction fails (e.g., non-numeric data), compare equality.
                        diff = self.df_selected[col] != self.df_selected[col_y]
                        gap = 0 if not diff.any() else "Mismatch"
                    
                    print(f"Duplicate variable {base}: max gap between {col} and {col_y} is {gap}")
                    
                    # If the gap is zero, drop the duplicate '_y' column.
                    if gap == 0:
                        print(f"Dropping duplicate column {col_y} as it is identical to {col}.")
                        self.df_selected.drop(columns=[col_y], inplace=True)
        
        # Reorder columns: ensure that the columns in essential_vars and 'Year'
        # are the first columns, followed by the rest in their original order.
        order_cols = []
        for col in self.essential_vars:
            if col in self.df_selected.columns:
                order_cols.append(col)
        if 'Year' in self.df_selected.columns:
            order_cols.append('Year')
        # Append any remaining columns that were not in order_cols.
        other_cols = [col for col in self.df_selected.columns if col not in order_cols]
        self.df_selected = self.df_selected[order_cols + other_cols]
        
        return self.df_selected
        
        
    def compare_variables(self, var_RCFD, var_RCON):
        """
        Compare two columns (e.g., a RCFD column and a RCON column).
        
        Returns:
          A dictionary with counts for:
            - both_valid: Observations where both are not NaN.
            - RCFD_only: Observations where only var_RCFD is not NaN.
            - RCON_only: Observations where only var_RCON is not NaN.
            - both_NaN: Observations where both are NaN.
          
        Note: This function requires that select_variables() has been run.
        """
        if self.df_selected is None:
            raise ValueError("Data has not been subset. Please run select_variables() first.")
        
        for var in [var_RCFD, var_RCON]:
            if var not in self.df_selected.columns:
                raise ValueError(f"Column {var} is not available in the selected DataFrame.")
        
        df_subset = self.df_selected[[var_RCFD, var_RCON]]
        both_valid = df_subset.dropna().shape[0]
        rcf_only = ((df_subset[var_RCFD].notna()) & (df_subset[var_RCON].isna())).sum()
        rcon_only = ((df_subset[var_RCON].notna()) & (df_subset[var_RCFD].isna())).sum()
        both_nan = ((df_subset[var_RCFD].isna()) & (df_subset[var_RCON].isna())).sum()
        
        return {
            "both_valid": both_valid,
            "RCFD_only": rcf_only,
            "RCON_only": rcon_only,
            "both_NaN": both_nan
        }

    def construct_definitions(self, mappings):
        """
        Construct new variables from pairs of existing columns based on provided mappings.
        
        Parameters:
        mappings (list): A list of dictionaries. Each dictionary should specify:
            - "first_col": Name of the first column.
            - "second_col": Name of the second column.
            - "new_var": Desired name for the new variable.
            Optional keys:
            - "mask_zeros": (bool) If True, replace zeros with NaN.
            - "apply_diff": (bool) If True, compute the difference over time.
            - "method": (str) Specifies how to combine the two columns when both are non-null.
                        Options are: "secondary", "first", "min", "max", "mean", or "sum".
                        Default is "secondary".
        
        Returns:
        A new DataFrame with the constructed variables appended.
        """
        if self.df_selected is None:
            raise ValueError("Data has not been subset. Please run select_variables() first.")
            
        new_df = self.df_selected.copy()
        
        for mapping_item in mappings:
            first_col = mapping_item.get("first_col")
            second_col = mapping_item.get("second_col")
            new_var = mapping_item.get("new_var")
            
            # Ensure the specified columns exist.
            for col in [first_col, second_col]:
                if col not in new_df.columns:
                    raise ValueError(f"Column {col} is not in the selected DataFrame.")
                    
            # Determine the method for combining the two columns.
            method = mapping_item.get("method", "secondary")
            if method == "min":
                new_df[new_var] = new_df[[first_col, second_col]].min(axis=1)
            elif method == "max":
                new_df[new_var] = new_df[[first_col, second_col]].max(axis=1)
            elif method == "mean":
                new_df[new_var] = new_df[[first_col, second_col]].mean(axis=1, skipna=True)
            elif method == "sum":
                new_df[new_var] = new_df[first_col] + new_df[second_col]
            elif method == "first":
                new_df[new_var] = new_df[first_col].combine_first(new_df[second_col])
            elif method == "secondary":
                new_df[new_var] = new_df[second_col].combine_first(new_df[first_col])
            else:
                raise ValueError(f"Unknown method provided: {method}")
            
            # Optionally mask zeros.
            if mapping_item.get("mask_zeros", False):
                new_df[new_var] = new_df[new_var].mask(new_df[new_var] == 0, np.nan)
            
            # Optionally apply differencing over time.
            if mapping_item.get("apply_diff", False):
                if "IDRSSD" not in new_df.columns or "Date" not in new_df.columns:
                    raise ValueError("Both 'IDRSSD' and 'Date' columns are required to compute differences.")
                new_df = new_df.sort_values("Date")
                new_df[new_var] = new_df.groupby("IDRSSD")[new_var].diff()
        
        self.df_constructed = new_df

        return new_df


    def create_balanced_panel(self, df_input=None):
        """
        Transform the given dataset (or the constructed dataset) into a balanced panel by retaining
        only banks (identified by 'IDRSSD') that appear in all dates.
        
        Parameters:
          df_input (DataFrame, optional): The DataFrame to convert. Defaults to using self.df_constructed.
        
        Returns:
          A balanced panel DataFrame.
        """
        if df_input is None:
            if self.df_constructed is None:
                raise ValueError("No constructed DataFrame available. Please run construct_definitions() first.")
            df_input = self.df_constructed
        
        if "IDRSSD" not in df_input.columns or "Date" not in df_input.columns:
            raise ValueError("Both 'IDRSSD' and 'Date' columns must be in the DataFrame for creating a balanced panel.")
        
        n_dates = df_input["Date"].nunique()
        valid_banks = df_input.groupby("IDRSSD")["Date"].nunique()[lambda x: x == n_dates].index
        balanced_df = df_input[df_input["IDRSSD"].isin(valid_banks)].copy()
        self.df_balanced = balanced_df
        return balanced_df

In [3]:
path = 'C:/Users/angel/Documents/Economics/Research/Banking Project/data/clean'

In [4]:
cr = CallReports(path)

In [5]:
# define maturity variables:
loans_mat_vars = [
                'RCONA564', 'RCONA565', 'RCONA566', 'RCONA567', 'RCONA568', 'RCONA569',     # used
                #'RCFDA564', 'RCFDA565', 'RCFDA566', 'RCFDA567', 'RCFDA568', 'RCFDA569',     # to be tested
                # ------------------------------------------------------------------------------------------------
                'RCFDA570', 'RCFDA571', 'RCFDA572', 'RCFDA573', 'RCFDA574', 'RCFDA575',     # used 
                #'RCONA570', 'RCONA571', 'RCONA572', 'RCONA573', 'RCONA574', 'RCONA575',     # to be tested  
                ]

securities_mat_vars = [
             # --------------------------------------  Treasuries  --------------------------------------
                'RCFDA549', 'RCFDA550', 'RCFDA551', 'RCFDA552', 'RCFDA553', 'RCFDA554',     # used
                'RCONA549', 'RCONA550', 'RCONA551', 'RCONA552', 'RCONA553', 'RCONA554',     # to be tested
            # --------------------------------------  MBS  --------------------------------------
                'RCFDA555', 'RCFDA556', 'RCFDA557', 'RCFDA558', 'RCFDA559', 'RCFDA560',     # used
                'RCONA555', 'RCONA556', 'RCONA557', 'RCONA558', 'RCONA559', 'RCONA560',     # to be tested
                ]


# define the list of variables that will be used
vars = [
             # ------------------------------------------------------------------------------------------------
            'RCON2170', 'RCFD2170',                                    # Total Assets
             # ------------------------------------------------------------------------------------------------
             'RCON2122', 'RCFD2122',                                    # Total Loans
             # ------------------------------------------------------------------------------------------------
             'RCON2200',                                                # Total Deposits
             # ------------------------------------------------------------------------------------------------
             'RCON1754', 'RCFD1754',                                    # HTM Securities Ammortized Cost
             'RCFD1754_x', 'RCFD1754_y', 'RCON1754_x', 'RCON1754_y',
             # ------------------------------------------------------------------------------------------------
             'RCON1772',                                                # AFS Securities Ammortized Cost
             # ------------------------------------------------------------------------------------------------
             'RCFD1773_x', 'RCFD1773_y', 'RCON1773',                    # AFS Securities Fair Value
             # ------------------------------------------------------------------------------------------------
             'RCON0010', 'RCFD0010',                                    # Cash and balances due from depository institutions                                                
             'RCON0071', 'RCON0081',                                    
             'RCFD0071', 'RCFD0081',                                    
             # ------------------------------------------------------------------------------------------------
             'RIAD4073', 'RIAD4200', 'RIAD4185', 'RIAD4180', 'RIAD4172',# Income Variables
             ] 

# create a list putting together 'vars', 'loans_mat_vars', and 'securities_mat_vars':
all_vars = vars + loans_mat_vars + securities_mat_vars

In [6]:
main = cr.select_variables(all_vars)

Duplicate variable RCFD1754: max gap between RCFD1754_x and RCFD1754_y is 0.0
Dropping duplicate column RCFD1754_y as it is identical to RCFD1754_x.
Duplicate variable RCFD1773: max gap between RCFD1773_x and RCFD1773_y is 0.0
Dropping duplicate column RCFD1773_y as it is identical to RCFD1773_x.
Duplicate variable RCON1754: max gap between RCON1754_x and RCON1754_y is 0.0
Dropping duplicate column RCON1754_y as it is identical to RCON1754_x.


In [7]:
#cr.df_selected.head(10)
# Matches the other file! 

In [8]:
#last_digits = sorted(list(set([var[-3:] for var in securities_mat_vars])))

#for x in last_digits:
#    print('---------------------------------------------------------------------------------')
#    print('For x = ', x)
#    vars = [
#    'RCFDA' + str(int(x)), 'RCONA' + str(int(x))
#    ]
#    print(cr.compare_variables(vars[0], vars[1]))
# Matches the other file!

In [9]:
#last_digits = sorted(list(set([var[-3:] for var in loans_mat_vars])))
#last_digits = [int(x) for x in last_digits]
#last_digits = [x for x in last_digits if x < 570]

#for x in last_digits:
#    print('---------------------------------------------------------------------------------')
#    vars = [
#    'RCFDA' + str(x+6), 'RCONA' + str(x)
#    ]
#    print(vars[0], vars[1])
#    print(cr.compare_variables(vars[0], vars[1]))

# Matches the other file!   

In [10]:
#print(' ---------------------------------------------- AMMORTIZED COST SECURITIES ----------------------------------------------')

#print('Min Date in which RCFD1754 is not null:', main[main['RCFD1754'].notnull()]['Date'].min())
#print('Max Date in which RCFD1754 is not null:', main[main['RCFD1754'].notnull()]['Date'].max())

#print(' ---------------------------------------------- FAIR VALUE SECURITIES ----------------------------------------------')
# print the amount of obs for which RCFD1773_x and RCON1773 are both reported and different:
#print('For how many non-null obs. RCFD1773_x and RCON1773 are both reported and different:',
#      len(main[main['RCFD1773_x'].notnull() & 
#     main['RCON1773'].notnull() & 
#     (main['RCFD1773_x']-main['RCON1773'] != 0)]))
# print the amount of obs for which RCFD1773_x and RCON1773 are both reported and are the same:
#print('For how many non-null obs. RCFD1773_x and RCON1773 are both reported and the same:',
#      len(main[main['RCFD1773_x'].notnull() & 
#     main['RCON1773'].notnull() & 
#     (main['RCFD1773_x']-main['RCON1773'] == 0)]))

# print the amount of obs for which RCFD1773_x is reported and RCON1773 is not:
#print('For how many non-null obs. RCFD1773_x is reported and RCON1773 is not:',
#      len(main[main['RCFD1773_x'].notnull() & main['RCON1773'].isnull()]))

# print the amount of obs for which RCFD1773_x is not reported and RCON1773 is:
#print('For how many non-null obs. RCFD1773_x is not reported and RCON1773 is:',
#      len(main[main['RCFD1773_x'].isnull() & main['RCON1773'].notnull()]))


#print(' ---------------------------------------------- Cash ----------------------------------------------')

# print the obs in which 'RCON0071' is reported and 'RCON0081' is not:
#print('For how many non-null obs. RCON0071 is reported and RCON0081 is not:',
#      len(main[main['RCON0071'].notnull() & main['RCON0081'].isnull()]))

# print the obs in which 'RCON0071' and 'RCON0081' are both reported:
#print('For how many non-null obs. RCON0071 and RCON0081 are both reported:',
#      len(main[main['RCON0071'].notnull() & main['RCON0081'].notnull()]))

# print the obs in which 'RCON0010', 'RCON0071', and 'RCON0081' are all reported:
#print('For how many non-null obs. RCON0010, RCON0071, and RCON0081 are all reported:',
#      len(main[main['RCON0010'].notnull() & main['RCON0071'].notnull() & main['RCON0081'].notnull()]))

# print the obs in which 'RCON0010' is reported and 'RCON0071' or 'RCON0081' are not:
#print('For how many non-null obs. RCON0010 is reported and RCON0071 or RCON0081 are not:',
#      len(main[main['RCON0010'].notnull() & (main['RCON0071'].isnull() | main['RCON0081'].isnull())]))

# print the obs in which 'RCON0010' is not reported and 'RCON0071' and 'RCON0081' are:
#print('For how many non-null obs. RCON0010 is not reported and RCON0071 and RCON0081 are:',
#      len(main[main['RCON0010'].isnull() & main['RCON0071'].notnull() & main['RCON0081'].notnull()]))

# print the obs in which none is reported:
#print('For how many non-null obs. none of RCON0010, RCON0071, and RCON0081 are reported:',
#      len(main[main['RCON0010'].isnull() & main['RCON0071'].isnull() & main['RCON0081'].isnull()]))

#print('------------------------------------------------------------------------------------------------')
# Matches the other file!

In [11]:
# Define the mappings for all of the easy-to-create variables:
'''
mappings = [
    # 1) Loans: Total Loans = RCON2122.combine_first(RCFD2122), then mask zeros.
    {
        "first_col": "RCFD2122",
        "second_col": "RCON2122",
        "new_var": "Total Loans",
        "method": "secondary",   # Gives RCON2122 if available, else RCFD2122.
        "mask_zeros": True
    },
    # 2) Deposits: Create Total Deposits from RCON2200 (a simple copy) and mask zeros.
    {
        "first_col": "RCON2200",
        "second_col": "RCON2200",
        "new_var": "Total Deposits",
        "method": "secondary",   # Since both are the same, this simply copies RCON2200.
        "mask_zeros": True
    },
    # 3) Maturity variables – Treasuries:
    {
        "first_col": "RCFDA549",
        "second_col": "RCONA549",
        "new_var": "Treasuries (3M-)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA550",
        "second_col": "RCONA550",
        "new_var": "Treasuries (3M-1Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA551",
        "second_col": "RCONA551",
        "new_var": "Treasuries (1Y-3Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA552",
        "second_col": "RCONA552",
        "new_var": "Treasuries (3Y-5Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA553",
        "second_col": "RCONA553",
        "new_var": "Treasuries (5Y-15Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA554",
        "second_col": "RCONA554",
        "new_var": "Treasuries (15Y+)",
        "method": "secondary"
    },
    # 3) Maturity variables – MBS:
    {
        "first_col": "RCFDA555",
        "second_col": "RCONA555",
        "new_var": "MBS (3M-)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA556",
        "second_col": "RCONA556",
        "new_var": "MBS (3M-1Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA557",
        "second_col": "RCONA557",
        "new_var": "MBS (1Y-3Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA558",
        "second_col": "RCONA558",
        "new_var": "MBS (3Y-5Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA559",
        "second_col": "RCONA559",
        "new_var": "MBS (5Y-15Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA560",
        "second_col": "RCONA560",
        "new_var": "MBS (15Y+)",
        "method": "secondary"
    },
    # 3) Maturity variables – Overall Securities (as the sum of Treasuries and MBS for each bucket):
    {
        "first_col": "Treasuries (3M-)",
        "second_col": "MBS (3M-)",
        "new_var": "Securities (3M-)",
        "method": "sum"
    },
    {
        "first_col": "Treasuries (3M-1Y)",
        "second_col": "MBS (3M-1Y)",
        "new_var": "Securities (3M-1Y)",
        "method": "sum"
    },
    {
        "first_col": "Treasuries (1Y-3Y)",
        "second_col": "MBS (1Y-3Y)",
        "new_var": "Securities (1Y-3Y)",
        "method": "sum"
    },
    {
        "first_col": "Treasuries (3Y-5Y)",
        "second_col": "MBS (3Y-5Y)",
        "new_var": "Securities (3Y-5Y)",
        "method": "sum"
    },
    {
        "first_col": "Treasuries (5Y-15Y)",
        "second_col": "MBS (5Y-15Y)",
        "new_var": "Securities (5Y-15Y)",
        "method": "sum"
    },
    {
        "first_col": "Treasuries (15Y+)",
        "second_col": "MBS (15Y+)",
        "new_var": "Securities (15Y+)",
        "method": "sum"
    },
    # 3) Maturity variables – Overall Loans:
    {
        "first_col": "RCFDA570",
        "second_col": "RCONA564",
        "new_var": "Loans (3M-)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA571",
        "second_col": "RCONA565",
        "new_var": "Loans (3M-1Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA572",
        "second_col": "RCONA566",
        "new_var": "Loans (1Y-3Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA573",
        "second_col": "RCONA567",
        "new_var": "Loans (3Y-5Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA574",
        "second_col": "RCONA568",
        "new_var": "Loans (5Y-15Y)",
        "method": "secondary"
    },
    {
        "first_col": "RCFDA575",
        "second_col": "RCONA569",
        "new_var": "Loans (15Y+)",
        "method": "secondary"
    }
]

'''

# Example usage:

# Create all the new variables:
#df = cr.construct_definitions(mappings=mappings)

#print(df['Total Loans'].describe())
#print(df['Total Deposits'].describe())
#print(df['Loans (3M-)'].describe())
#print(df['Treasuries (3M-)'].describe())
#print(df['MBS (3M-)'].describe())
#print(df['Securities (3M-)'].describe())

# Matches the other file!

'\nmappings = [\n    # 1) Loans: Total Loans = RCON2122.combine_first(RCFD2122), then mask zeros.\n    {\n        "first_col": "RCFD2122",\n        "second_col": "RCON2122",\n        "new_var": "Total Loans",\n        "method": "secondary",   # Gives RCON2122 if available, else RCFD2122.\n        "mask_zeros": True\n    },\n    # 2) Deposits: Create Total Deposits from RCON2200 (a simple copy) and mask zeros.\n    {\n        "first_col": "RCON2200",\n        "second_col": "RCON2200",\n        "new_var": "Total Deposits",\n        "method": "secondary",   # Since both are the same, this simply copies RCON2200.\n        "mask_zeros": True\n    },\n    # 3) Maturity variables – Treasuries:\n    {\n        "first_col": "RCFDA549",\n        "second_col": "RCONA549",\n        "new_var": "Treasuries (3M-)",\n        "method": "secondary"\n    },\n    {\n        "first_col": "RCFDA550",\n        "second_col": "RCONA550",\n        "new_var": "Treasuries (3M-1Y)",\n        "method": "secondary"\

In [12]:
'''
mappings = [
    # Mapping 1: Create RCON1754_right = RCON1754_x.combine_first(RCON1754)
    {
        "first_col": "RCON1754",      # fallback column
        "second_col": "RCON1754_x",   # primary column
        "new_var": "RCON1754_right",
        "method": "secondary"
    },
    # Mapping 2: Create RCFD1754_right = RCFD1754_x.combine_first(RCFD1754)
    {
        "first_col": "RCFD1754",      # fallback column
        "second_col": "RCFD1754_x",   # primary column
        "new_var": "RCFD1754_right",
        "method": "secondary"
    },
    # Mapping 3: Create 1754_right from RCFD1754_right and RCON1754_right
    # This takes the row-wise minimum:
    {
        "first_col": "RCFD1754_right",
        "second_col": "RCON1754_right",
        "new_var": "1754_right",
        "method": "min"
    }
]
'''
# Create all the new variables:
#df = cr.construct_definitions(mappings=mappings)

#print(df['RCON1754_right'].describe())
#print(df['RCFD1754_right'].describe())
#print(df['1754_right'].describe())

# Matches the other file!

'\nmappings = [\n    # Mapping 1: Create RCON1754_right = RCON1754_x.combine_first(RCON1754)\n    {\n        "first_col": "RCON1754",      # fallback column\n        "second_col": "RCON1754_x",   # primary column\n        "new_var": "RCON1754_right",\n        "method": "secondary"\n    },\n    # Mapping 2: Create RCFD1754_right = RCFD1754_x.combine_first(RCFD1754)\n    {\n        "first_col": "RCFD1754",      # fallback column\n        "second_col": "RCFD1754_x",   # primary column\n        "new_var": "RCFD1754_right",\n        "method": "secondary"\n    },\n    # Mapping 3: Create 1754_right from RCFD1754_right and RCON1754_right\n    # This takes the row-wise minimum:\n    {\n        "first_col": "RCFD1754_right",\n        "second_col": "RCON1754_right",\n        "new_var": "1754_right",\n        "method": "min"\n    }\n]\n'