In [None]:
class Projects(Job):
    """ 
        for project specific data transformations
    """
    
    def __init__(self, focus_last_FY:bool = False, is_dev:bool=False):
        super().__init__()
        self.gold_path = {
            "weekly_banking": self.base_dir / "Gold" / "FinanceProject" / "WeeklyBanking",
            "inventory": self.base_dir / "Gold" / "InventoryProject",
            "payroll": self.base_dir / "Gold" / "HRProject" /"PayrollProject",
            "finance_operational": self.base_dir / "Gold" / "FinanceOperationalProject",
            "budget": self.base_dir / "Gold" / "BudgetProject",
            "QBOTime": self.base_dir / "Gold" / "HRProject" / "QBOTimeProject",
            "hr_combined": self.base_dir / "Gold" / "HRProject" / "CombinedSummary",
            "pillar_dashboard": self.base_dir / "Gold" / "DirectorDashboards",
            "APReporting": self.base_dir / "Gold" / "FinanceProject" / "APReporting"
        }
        self.silver_acc = pd.read_csv(self.silver_path["QBO"]["Dimension_time"]/"Account.csv")
        self.commodities = {
            "Produce": ["Strawberry", "Watermelon", "Cantaloupe", "Market Garden", "Broccoli", "Pumpkin", "Sweet Corn", "Cauliflower", "Squash", "Honeydew Melon", "Potato", "Carrot", "Cabbage",
                        "Lettuce", "Brussel Sprouts", "Prairie Pathways", "Beet", "Corn Maze", "CSA"],
            "Grain": ["Blackeye Pea", "Winter Wheat", "Durum", "Cotton", "Chickpea", "Barley", "Green Lentil", "Red Lentil", "Canola", 
                        "Wheat","Field Pea", "Corn", "Oat", "Soybean", "Bean"],
            "Cattle": ["Weaned Calves", "Cull Bull", "Cull Cow", "Bred Heifer", "Purebred Yealing Bull", "Purebred Heifer", 
                        "Purebred Cow", "Purebred Bull", "Cow", "Bull", "Steer", "Heifer", "Yearling", "Calf"]
        }
        self.locations = {
            "Produce": ["BritishColumbia (produce)", "Outlook", "Arizona (produce)", "Montana (produce)", "Seeds USA"],
            "Cattle": ["Airdrie", "Eddystone (cattle)", "Ashcroft", "Home Ranch", "Diamond S", "Wolf Ranch", "Fraser River Ranch", "Moon Ranch", "Waldeck", "Calderbank","BC Cattle MFL"],
            "Grain": ["Eddystone (grain)", "Arizona (grain)", "Colorado", "Swift Current", "Regina", "Raymore", "Prince Albert", "The Pas",
                      "Kamsack", "Hafford", "Yorkton", "Fly Creek", "Camp 4", "Havre", "Billings"],
            "Seed": ["NexGen", "Seeds"],
            "Others": ["Eddystone (corporate)", "Arizona (corporate)", "Legacy", "BritishColumbia (corporate)", "Corporate"]
        }
        self.bc_ranches = ["Ashcroft", "Fraser River Ranch", "Moon Ranch", "Wolf Ranch", "Diamond S","Home Ranch"]
        self.pl_exist = False # determines whether _financial_operational has run and gold_pl is stored in self, if not, any subsequent downstream projects will run _financial_operational first
        self.currentFY = self.today.year if self.today.month<=10 else self.today.year + 1
        if focus_last_FY: self.currentFY -= 1
        self.is_dev = is_dev
        self.accnum_reroute = {"MFL405101": "MFL405110", "MFL405102":"MFL405120", "MFL405103":"MFL405130", "MSL585000":"MSL562505", "MSL402110": "MSL402112", "MFBC575000": "MFBC575020",
                               "MFBC629000": "MFBC629010", "MFBC531010": "MFBC531050"}
        self.accid_reroute = {"MFBC250": "MFBC210", "MFBC216":"MFBC210", "MFBC358":"MFBC210", "MFBC255":"MFBC210", "MFBC314":"MFBC272", "MFBC268":"MFBC584", "MFBC188":"MFBC192", "MFBC374":"MFBC190",
                              "MFBC61":"MFBC190", "MFBC26":"MFBC585", "MFBC412":"MFBC242", "MFBC66": "MFBC592", "MFBC65":"MFBC220", "MFBC19":"MFBC210", "MFBC60":"MFBC187",
                              "MPUSA1150040007":"MPUSA79"}
        self.conversion_mt_to_lb = 2204.62262185
        self.conversion_bu_to_lb_canola = 55
        self.conversion_bu_to_lb_others = 60
        self.customers_dict = {"Billings": ["Viterra Huntley"], 
                               "Eddystone": ["Cargill Crush Clavet"],
                               "Hafford": ["P&H North Battleford","Bunge North Battleford","Grains Connect Hafford", "Cargill Crush Clavet", "Bunge Saskatoon"],
                               "Kamsack": ["Bunge Canora", "Bunge Kamsack", "LDC Crush"], # -> viterra kamsack to bunge kamsack
                               "Outlook": ["Monette produce Carrots","Viterra Saskatoon"],
                               "PA": ["Cargill Clavet Crush","Viterra White Star"],
                               "Raymore": ["Viterra Raymore", "Bunge Raymore"],
                               "Regina": ["Viterra Moose Jaw"],
                               "SwiftCurrent": ["Paterson Grain Swift Current", "Viterra Swift Current"]}
        self.customers_list = []
        for v in self.customers_dict.values():
            self.customers_list.extend(v)
    
    def _weekly_banking(self) -> None:
        """ 
            weekly banking project: match latest GL bank transactions with raw activities - extract accounts for those activities
                assumptions: a raw entry (e.g., invoice) can have multiple lines - multiple associated accounts, only considering the first one 
        """
        print("\nStarting Weekly Banking Project Transformation\n")
        # determine minal date to keep for GL
        if self.today.month > 6:
            year = self.today.year 
            month = self.today.month - 6 
        else:
            year = self.today.year - 1
            month = self.today.month + 12 - 6
        # load and prepare data
        ## account
        account = self.silver_acc.copy(deep=True)
        ## change some accounts to Transfer category
        acc_list = ["MFL264", "MSL250"]
        account.loc[account["AccID"].isin(acc_list), "ProfitType"] = "Asset"
        account.loc[account["AccID"].isin(acc_list), "Category"] = "Transfer"
        account_bank = account[account["AccountType"]=="Bank"]
        ## LinkedTxn for invoice and bill
        invoice_linked = pd.read_csv(self.silver_path["QBO"]["Raw"] / "LinkedTxn"/ "LinkedTxn_Mapping_Invoice.csv")
        bill_linked = pd.read_csv(self.silver_path["QBO"]["Raw"] / "LinkedTxn"/ "LinkedTxn_Mapping_Bill.csv")
        mapping = pd.concat([invoice_linked, bill_linked])
        mapping = mapping.drop(columns=["Corp"])
        # define customized function for processing other raw table
        def _process_facts(df_type:str) -> pd.DataFrame:
            """ 
                function for processing raw tables for mapping table - TransactionID_partial to AccID
            """
            df = pd.read_csv(self.silver_path["QBO"]["Raw"]/(df_type+".csv"), usecols = ["TransactionID", "AccID"])
            df["TransactionID"] = df["TransactionID"].apply(lambda x: x.split("-")[1])
            df = df.drop_duplicates()
            df = df.rename(columns={"TransactionID":"TxnId"})
            return df
        ## purchase table for expense transactions
        purchase = _process_facts("Purchase")
        purchase["TxnType"] = "Expense"
        mapping = pd.concat([mapping,purchase])
        ## journal entries - exclude most entries related to bank
        journal = _process_facts("JournalEntry")
        journal["TxnType"] = "Journal Entry"
        # for journal entries, exclude most of entires where the activity account ID is a bank ID
        exclude_list = list(account_bank.AccID.unique())
        # mylist = ["MFL51", "MFBC470", "MFBC471", "MFL28", "MFL27", "MFL1150040024"]
        mylist = ["MFBC470", "MFBC471"] # should include these accounts
        for acc in mylist:
            exclude_list.remove(acc)
        journal = journal[~journal["AccID"].isin(exclude_list)]
        mapping = pd.concat([mapping,journal])
        ## deposit
        deposit = _process_facts("Deposit")
        deposit["TxnType"] = "Deposit"
        mapping = pd.concat([mapping,deposit])
        ## salesreceipts
        sales = _process_facts("SalesReceipt")
        sales["TxnType"] = "Sales Receipt"
        mapping = pd.concat([mapping,sales])
        # process mapping table - dedup
        mapping = mapping.drop_duplicates(subset=["TxnId"],keep="first")
        ## load GL transacitons
        cols = ["TransactionType","TransactionID_partial","AccID","AccNum","AccName", "TransactionDate", "Amount", "SplitAcc", "SplitAccID", "Memo", "Corp", "Balance"]
        transactions = pd.read_csv(self.silver_path["QBO"]["GL"]/"GeneralLedger.csv",dtype={"TransactionID_partial":str}, usecols=cols)
        transactions = transactions[transactions["AccID"].isin(account_bank.AccID.unique())]
        transactions["TransactionDate"] = pd.to_datetime(transactions["TransactionDate"])
        transactions = transactions[transactions["TransactionDate"]>=dt.datetime(year, month, 1)]
        transactions = transactions.rename(columns={"TransactionType":"TxnType","TransactionID_partial":"TxnId",
                                                    "AccID":"BankAccID","AccNum":"BankAccNum","AccName":"BankAccName",
                                                    "TransactionDate":"BankActivityDate","Amount":"BankAmount"})
        transactions["Sign"] = transactions["BankAmount"].apply(lambda x: "Positive" if x>=0 else "Negative")
        # merge to get CurrencyID for bank_acc
        transactions = pd.merge(transactions, account_bank.loc[:,["AccID","CurrencyID"]], left_on=["BankAccID"], right_on=["AccID"], how="left")
        transactions = transactions.drop(columns=["AccID"])
        # separating transfers - don't merge with mapping table
        transfers = transactions[transactions["TxnType"] == "Transfer"].copy(deep=True)
        transactions = transactions[transactions["TxnType"]!="Transfer"]
        transactions = transactions.drop(columns=["SplitAcc", "SplitAccID"])
        transactions["BankActivityDate"] = pd.to_datetime(transactions["BankActivityDate"])
        transactions["TxnType"] = transactions["TxnType"].replace({"Cheque Expense":"Expense", "Check": "Expense"})
        # merge with mapping table
        transactions_mapped = pd.merge(transactions,mapping,on=["TxnId","TxnType"],how="left")
        non_match = transactions_mapped[transactions_mapped["AccID"].isna()]
        print("None Match Transaction Types")
        print(non_match.TxnType.value_counts())
        print(f"Non matches - {len(non_match)}")
        # function to determine transfer type
        def _determine_transfer_type(entry:str) -> str:
            """ 
                determine whether the transfer is for visa, bank, or other transfer
            """
            if "visa" in entry.lower():
                return "Visa Payment"
            elif "due" in entry.lower():
                return "Bank Transfer"
            else:
                return "Other Transfer"
        # allocate transfer type 
        transfers["TransferType"] = transfers["SplitAcc"].apply(lambda x: _determine_transfer_type(x))
        transfers = transfers.rename(columns={"SplitAccID":"AccID"})
        transfers = transfers.drop(columns=["SplitAcc"])
        transactions_mapped = pd.concat([transactions_mapped,transfers], ignore_index=True)
        # clean up the dataframe
        transactions_mapped = transactions_mapped.rename(columns={"CurrencyID":"BankCurrencyID"})
        transactions_mapped = pd.merge(transactions_mapped, account.loc[:,["AccID","AccName","AccNum","Category","ProfitType","CurrencyID"]], on="AccID", how="left")
        transactions_mapped.loc[transactions_mapped["TransferType"]=="Bank Transfer","Category"] = "Bank Transfer"
        transactions_mapped.loc[((transactions_mapped["BankAccNum"].str.startswith("MSL"))&(transactions_mapped["AccNum"]=="MSL120001")), "Category"] = "Seed Processing Revenue"
        transactions_mapped = transactions_mapped.rename(columns={"AccNum":"ActivityAccNum", "AccName":"ActivityAccName"})
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")&(transactions_mapped["Sign"]=="Positive")), "ProfitType"] = "Other Operating Revenue"
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")&(transactions_mapped["Sign"]=="Positive")), "Category"] = "Miscellaneous income"
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")&(transactions_mapped["Sign"]=="Negative")), "ProfitType"] = "Operating Overheads"
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")&(transactions_mapped["Sign"]=="Negative")), "Category"] = "Office and miscellaneous"
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")), "ActivityAccNum"] = "Manual Adjustment"
        transactions_mapped.loc[((transactions_mapped["TxnType"]=="Sales Tax Payment")), "ActivityAccName"] = "Manual Adjustment"
        # csv from sharepoint is unstable, and produced unpredictable readings from Power BI
        self.check_file(self.gold_path["weekly_banking"])
        transactions_mapped.to_excel(self.gold_path["weekly_banking"]/"BankingActivity.xlsx", sheet_name="transactions", index=False)

    def _extract_accnum_accid(self) -> None:
        """ 
            this function creates a accnum to accID mapping table to avoid repeated merges
        """
        self.acc_map = self.operation_acc.set_index(["AccNum"])["AccID"]

    def _perform_manual_adjust_GL_inventory(self, df: pd.DataFrame) -> pd.DataFrame:
        """ 
            This function reads inventory account balances at the beginning of this fiscal year, and apply the amount to PL accounts for Fert/Chem/Seed
        """
        # fixed column values 
        date = '2024-11-01'
        transaction_type = 'Manual Adjustments'
        memo = 'Adjustments from Trial Balance from beginning of this fiscal year'
        FY = 2025
        month = 'November'
        # read adjustments csv file
        adjustments = pd.read_csv(self.gold_path["finance_operational"]/"ManualAdjustments"/"2025.csv",dtype={"Amount":float})
        location_adj = {"Camp 4": "Billings", "Fly Creek":"Billings"}
        adjustments["Location"] = adjustments["Location"].replace(location_adj)
        # compute various Amount columns to match the other PL entries
        adjustments["AmountAdj"] = -adjustments["Amount"]
        adjustments["AmountCAD"] = adjustments.apply(lambda x: x["AmountAdj"] * self.fx if x["Currency"]=="USD" else x["AmountAdj"],axis=1)
        adjustments["AmountDisplay"] = -adjustments["AmountCAD"]
        adjustments["AccNum"] = adjustments.apply(lambda x: "".join(x["DisplayName"].split(" ")[:2]),axis=1)
        # create additional entries
        addition_df = df.head(0).copy(deep=True)
        row = {"TransactionDate":date, "TransactionType":transaction_type, "Memo":memo, "FiscalYear":FY, "Month":month, "FXRate": self.fx}
        for i in range(len(adjustments)):
            entry = row | {"Amount": adjustments.loc[i,"Amount"], "AccID": adjustments.loc[i,"AccID"], "AmountAdj": adjustments.loc[i,"AmountAdj"], 
                        "AmountCAD": adjustments.loc[i,"AmountCAD"], "AmountDisplay":adjustments.loc[i,"AmountDisplay"],
                        "Location":adjustments.loc[i,"Location"], "Pillar": adjustments.loc[i,"Pillar"],
                        "AccNum":adjustments.loc[i,"AccNum"] }
            addition_df.loc[len(addition_df)] = entry
        print(f"\nManual GL Inventory Accounts Adjustments created {len(addition_df)} entries\n")
        df = pd.concat([df,addition_df],ignore_index=True)
        return df

    def _finance_operational(self) -> None:
        """ 
            transform PL data into operational-ready
                1. reclassify accounts
                2. standardize location, classify pillar
                3. revising signs
        """
        print("\nStarting Finance Operational Project Transformation\n")
        # load data from silver space
        data = pd.read_csv(self.silver_path["QBO"]["PL"]/"ProfitAndLoss.csv", dtype={"Class":str, "ClassID":str})
        assert len(data.FXRate.value_counts()) == 1, "different FXRate detected"
        self.fx = data.loc[0,"FXRate"]
        data["TransactionDate"] = pd.to_datetime(data["TransactionDate"])
        data["FiscalYear"] = data.TransactionDate.apply(lambda x: x.year + 1 if x.month >= 11 else x.year)
        # add month to the PL
        data["Month"] = data["TransactionDate"].dt.month_name()
        ## add location for seed operation
        data.loc[data["Corp"]=="MSL","Location"] = "Seeds"
        data.loc[data["Corp"]=="NexGen","Location"] = "NexGen"
        data.loc[data["Corp"]=="MSUSA","Location"] = "Seeds USA"
        # clean location
        data = data.rename(columns={"Location":"LocationRaw"})
        data["Location"] = data["LocationRaw"]
        data = data.fillna(value={"Location":"Missing"})
        # switch seeds usa to AZ produce
        data.loc[data["Corp"]=="MSUSA","Location"] = "Arizona (produce)"
        ## clean location
        clean_location = {"Airdrie - Grain":"Airdrie", "Airdrie - Cattle":"Airdrie", "Airdrie - General":"Airdrie", "Airdrie":"Airdrie", 
                        "Eddystone - Grain": "Eddystone (grain)", "Eddystone - Cattle": "Eddystone (cattle)", "Eddystone - General":"Eddystone (corporate)",
                        "Outlook (JV)":"Outlook", "AZ Produce":"Arizona (produce)", "Corporate":"Arizona (corporate)", "BC Produce":"BritishColumbia (produce)",
                        "Grain":"Arizona (grain)", "Ashcroft (CC, Fischer, Loon)":"Ashcroft", 
                        "Outlook (Capital)":"Outlook", "Colorado (MF)":"Colorado", "Colorado (JV)":"Colorado", "Cattle - General":"BritishColumbia (corporate)",
                        "Home (70 M, LF/W, 105 M)":"Home Ranch", "Diamond S (BR)":"Diamond S", "-Corporate":"Corporate",
                        "MT Produce": "Montana (produce)", "Fly Creek": "Billings", "Camp 4":"Billings", "BC Cattle":"BC Cattle MFL"}
        others = {"North Farm (deleted)":"Legacy", "Cache/Fischer/Loon - DNU":"Legacy"}
        data["Location"] = data["Location"].replace(clean_location)
        locations = self.locations["Produce"] + self.locations["Grain"] + self.locations["Cattle"] + self.locations["Others"] + self.locations["Seed"]
        unaccounted_location = list(set(data["Location"].unique()) - set(locations))
        print(f"location unaccounted for - {unaccounted_location}")
        # classify pillar
        data["Pillar"] = data.apply(lambda x: self._pillar_classification(x),axis=1)
        # reorganize corp
        ## MPUSA missing location = Arizona (produce)
        data.loc[((data["Corp"] == "MPUSA")&(data["Location"].isna())), "Location"] = "Arizona (produce)"
        data.loc[((data["Corp"] == "MPUSA")&(data["Location"]=="Missing")), "Location"] = "Arizona (produce)"
        data.loc[((data["Corp"] == "MPUSA")&(data["Location"] == "Arizona (produce)")), "Pillar"] = "Produce"
        ## AZ Produce --> MPUSA
        data.loc[data["Location"] == "Arizona (produce)", "Corp"] = "MPUSA"
        ## move everything for AZ in 2024 to produce
        data.loc[((data["FiscalYear"] >= 2024) & (data["Location"].str.contains("Arizona",case=False))),"Pillar"] = "Produce"
        data.loc[((data["FiscalYear"] >= 2024) & (data["Location"].str.contains("Arizona",case=False))),"Location"] = "Arizona (produce)"
        ## BC Produce --> MPL
        data.loc[data["Location"] == "BritishColumbia (produce)", "Corp"] = "MPL"
        ## Outlook --> MPL
        data.loc[data["Location"]=="Outlook", "Corp"] = "MPL"
        # reroute accid
        data["AccID"] = data["AccID"].replace(self.accid_reroute)
        # Reclassify accounts for Operational Purpose
        ## read & process operational classification
        with open(self.silver_path["QBO"]["Dimension"]/"acc_classification.yaml", "r", encoding="utf-8") as f:
            raw_acc = yaml.safe_load(f)
        rows = [(l1, l2, l3, v) 
                for l1, l1_inner in raw_acc.items() 
                for l2, l2_inner in l1_inner.items() 
                for l3, l3_inner in l2_inner.items() 
                for v in l3_inner]
        acc_operation = pd.DataFrame(rows, columns=["OperationProfType", "OperationCategory", "OperationSubCategory", "AccID"])
        ## read accounts table and apply new classification
        accounts = self.silver_acc
        accounts = pd.merge(accounts, acc_operation, on = "AccID", how = "left")
        accounts["Commodity"] = accounts.apply(lambda x: self._identify_product(x), axis=1)
        commodities = pd.DataFrame(data={"Commodity": accounts["Commodity"].unique()})
        commodities.to_csv(self.gold_path["inventory"]/"Tables"/"commodities_acc.csv", index=False)
        # prepare account table for mapping AccID from AccNum
        self.operation_acc = accounts[accounts["AccNum"].notna()]   # AccNum must be non-missing
        self.operation_acc = self.operation_acc[self.operation_acc["Active"]] # avoid non-active accounts what share same AccNum with active accounts
        self.operation_acc.to_csv(self.gold_path["finance_operational"]/"AccNumTOAccID.csv", index=False)
        # Revising Signs according to Operational Classification
        print("Revising Signs ...")
        # expense_accounts = accounts[(accounts["OperationCategory"] == "Expense") | (accounts["OperationCategory"] =="Inventory Consumption")] # for my classification
        expense_accounts = accounts[accounts["ProfitType"].isin(["Cost of Goods Sold", "Direct Operating Expenses", "Operating Overheads", "Other Expense"])]
        data["AmountDisplay"] = data.apply(lambda x: -x["AmountCAD"] if x["AccID"] in expense_accounts.AccID.unique() else x["AmountCAD"], axis=1)
        # data = self._perform_manual_adjust_GL_inventory(data)
        self.gold_pl = data
        self.gold_acc = accounts
        # save files
        print("Saving ...")
        self.check_file(self.gold_path["finance_operational"])
        data.to_csv(self.gold_path["finance_operational"]/"PL.csv", index=False)
        accounts.to_csv(self.gold_path["finance_operational"]/"Account_table.csv", index=False)
        accounts.to_excel(self.gold_path["finance_operational"]/"Account_table.xlsx", sheet_name = "Account", index=False)
        data.to_excel(self.gold_path["finance_operational"]/"PL.xlsx", sheet_name="Transactions", index=False)
        for pillar in ["Grain", "Cattle", "Seed", "Produce"]:
            data[data["Pillar"]==pillar].to_excel(self.gold_path["pillar_dashboard"]/pillar/"PL.xlsx", sheet_name="Transactions", index=False)
        self.pl_exist = True
    
    def _process_pp(self, data:pd.DataFrame) -> pd.DataFrame:
        """ 
            This function takes original dataframe, apply the payperiod number classification based on transactions date, process payperiod columns, and return the new dataframe,
                save the pp table for consolidated tables
        """
        date_col = "TransactionDate" if "TransactionDate" in data.columns else "date"
        # load payperiods
        payperiods = pd.read_csv(self.gold_path["payroll"]/"Payperiods.csv")
        payperiods["START"] = pd.to_datetime(payperiods["START"])
        payperiods["END"] = pd.to_datetime(payperiods["END"])
        payperiods = payperiods.loc[:,["PP","START","END","Cycle","FiscalYear"]]
        payperiods = payperiods.rename(columns={"PP":"PPNum"})
        payperiods["PPName"] = payperiods["Cycle"].astype(str).str.slice(2) + "-" + "PP" + payperiods["PPNum"].astype(str).str.zfill(2)
        # shift transaction dates - AZ: left 12 days, others: left 5 days
        offset_days = {"Arizona (produce)": 12, "Outlook": 12}
        data["days_offset"] = data["Location"].map(offset_days).fillna(5)
        data["date_shifted"] = data[date_col] - pd.to_timedelta(data["days_offset"],unit="days")
        data = data[data["date_shifted"]>=dt.datetime(2021,12,20)].copy(deep=True)
        # construct interval index object for all periods
        idx = pd.IntervalIndex.from_arrays(
            left = payperiods["START"],
            right = payperiods["END"],
            closed = "both"
        )
        # determine which payperiod a transaction date belongs to by identifying the positional index inside the interval index object
        pos = idx.get_indexer(data["date_shifted"])
        # extract payperiod info based on positional indices
        ppnum = payperiods["PPNum"].to_numpy()
        ppname = payperiods["PPName"].to_numpy()
        cycle = payperiods["Cycle"].to_numpy()
        data["PPNum"] = ppnum[pos]
        data["PPName"] = ppname[pos]
        data["Cycle"] = cycle[pos]
        # create mapping for max fiscal year per payperiod to determine which fiscal year a payperiod should bleong to
        mapping_table = data.groupby(["days_offset","PPName"]).agg({"FiscalYear":"max"}).reset_index(drop=False)
        data = data.drop(columns=["FiscalYear"])
        data = pd.merge(data, mapping_table, on=["days_offset", "PPName"], how="left")
        # drop intermediate columns
        data = data.drop(columns=["days_offset", "date_shifted"])
        # data.loc[:,["PPName", "PPNum", "Cycle", "FiscalYear"]].drop_duplicates().to_csv(self.gold_path["payroll"].parent/ "OtherTables" / "PayPeriods.csv", index=False)
            # take care of the duplicated problem - e.g., 23-PP22 has Fiscal year 2023 and 2024 - rank by PPName and FiscalYear, than drop the earlier FiscalYear record
        return data

    def _process_units(self) -> None:
        """ 
            this function read and process Unit files that contains unit numbers for each location
        """
        acres = pd.read_csv(self.gold_path["payroll"]/"Unit.csv",dtype={"Location":str, "Unit":float})
        acres["Location"] = acres["Location"].str.strip()
        doc_rename = {"Airdrie Grain": "Airdrie (grain)", "Aridrie Cattle":"Airdrie", "Arizona All":"Arizona (produce)",
                    "BC Cattle (head days 365)":"BritishColumbia (cattle)", "BC Produce":"BritishColumbia (produce)", 
                    "Box Elder":"Havre", "Eddystone Cattle":"Eddystone (cattle)", "Eddystone Grain":"Eddystone (grain)",
                    "Monette Seeds CDN (avg met. ton)":"Seeds", "Monette Seeds USA":"Seeds USA", "NexGen (avg met. ton)":"NexGen",
                    "Waldeck":"Waldeck", "Calderbank":"Calderbank"}
        acres["Location"] = acres["Location"].replace(doc_rename)
        acres["Pillar"] = acres.apply(lambda x: self._pillar_classification(x),axis=1)
        acres.to_csv(self.gold_path["payroll"].parent/ "OtherTables" /"Unit_PowerBI.csv",index=False)

    def _payroll_project(self) -> None: 
        """ 
            will run _finance_operational() first
            output: details + cost per unit (units per location input sheet) + average cost per unit for FY
        """
        self.check_file(self.gold_path["payroll"].parent/ "OtherTables")
        print("\nStarting Payroll Project Transformation\n")

        # load and filter accounts for wages and contract labor
        account = self.silver_acc[(self.silver_acc["Category"].isin(["Wages and benefits - direct","Wages and benefits - overhead"]) | (self.silver_acc["AccNum"].isin(["MFAZ595001","MFBC536030"])))] 
        # load only with transaction date later than 2021-12-20, and without "Accrual" in the memo
        if self.is_dev:
            data = pd.read_csv(self.gold_path["finance_operational"]/"PL.csv")
        else:
            if not self.pl_exist: self._finance_operational()
            data = self.gold_pl.copy(deep=True)
        data = data[data["AccID"].isin(account.AccID.unique())]
        data["TransactionDate"] = pd.to_datetime(data["TransactionDate"])
        data = data[data["TransactionDate"]>=dt.datetime(2021,12,20)].reset_index(drop=True)
        data = data[~data["Memo"].str.contains("Accrual",case=False,na=False)]
        # allocating payperiods
        data = self._process_pp(data=data)
        # standardizing location
        # data.loc[data["Location"]=="Airdrie (corporate)", "Pillar"] = "Cattle"                # deprecated
        # data.loc[data["Location"]=="Airdrie (corporate)", "Location"] = "Airdrie (cattle)"    # deprecated
        data.loc[data["Location"]=="Eddystone (corporate)", "Pillar"] = "Unclassified"
        data.loc[data["Location"]=="Eddystone (corporate)", "Location"] = "Unassigned"
        data.loc[data["Location"]=="Legacy", "Location"] = "Unassigned"
        data.loc[(data["Location"].str.contains("corporate",case=False,na=False)&(data["Location"]!="BritishColumbia (corporate)")),"Location"] = "Corporate"
        ## move BC ranches into BC Cattle
        data.loc[(data["Location"].isin(self.bc_ranches+["BritishColumbia (corporate)"])), "Location"] = "BritishColumbia (cattle)"
        data.loc[data["Location"] == "BritishColumbia (cattle)", "Pillar"] = "Cattle-CowCalf"
        # summarizing data
        ## by Location per PP
        data_summarized = pd.DataFrame(data.groupby(["Location","PPName","Pillar","FiscalYear","Cycle","PPNum"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False))
        assert len(data_summarized) == len(data.groupby(["Location","PPName"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False)), "Duplicated value detected for per Location per PP calculation"
        ## join acres data for CostPerUnit compute
        print("Summarizing ...")
        acres = pd.read_csv(self.gold_path["payroll"].parent/ "OtherTables" /"Unit_PowerBI.csv",dtype={"Location":str, "Unit":float})
        acres = acres.loc[:,["Location", "Unit"]]
        ### create BC cattle total units
        total_bc = 0
        for l in self.bc_ranches+["BritishColumbia (corporate)"]:
            total_bc += acres.loc[acres["Location"]==l, "Unit"].item()
        acres.loc[acres["Location"]=="BritishColumbia (cattle)", "Unit"] = total_bc
        acres["Unit"] = acres["Unit"].replace({0: 1})
        print(f"Unaccounted location for Acres Doc: {set(acres.Location.unique()) - set(data_summarized.Location.unique())}")
        print(f"Unaccounted location for QBO Payroll: {set(data_summarized.Location.unique()) - set(acres.Location.unique())}")
        data_summarized = pd.merge(data_summarized, acres, on="Location", how="left")
        data_summarized["CostPerUnit"] = data_summarized["AmountDisplay"] / data_summarized["Unit"] * 26
        data_summarized["Count"] = 1
        ## by Location
        data_summarized2 = data_summarized.groupby(by=["Location","FiscalYear","Pillar"]).agg({"CostPerUnit":"mean", "Count":"sum"}).reset_index(drop=False)
        data_summarized2 = data_summarized2.rename(columns={"CostPerUnit":"Avg CostPerUnit"})
        assert len(data_summarized2) == len(data_summarized.groupby(by=["Location","FiscalYear"]).agg({"CostPerUnit":"mean"})), "Duplicated value detected for per Location calculation"
        ## by pillar
        data_summarized3 = data_summarized2.groupby(by=["FiscalYear","Pillar"]).agg({"Avg CostPerUnit":"mean", "Count":"sum"}).reset_index(drop=False)
        assert len(data_summarized3) == len(data_summarized.groupby(by=["Pillar","FiscalYear"]).agg({"CostPerUnit":"mean"})), "Duplicated value detected for per Pillar calculation"
        # saving
        print("Saving ...")
        self.check_file(self.gold_path["payroll"])
        data.to_excel(self.gold_path["payroll"]/"Payroll.xlsx", sheet_name="Payroll", index=False)
        self.check_file(self.gold_path["hr_combined"] / "CSV")
        data_summarized.to_csv(self.gold_path["hr_combined"]/ "CSV" / "payroll_summarized1.csv", index=False)
        data_summarized2.to_csv(self.gold_path["hr_combined"]/ "CSV" / "payroll_summarized2.csv", index=False)
        data_summarized3.to_csv(self.gold_path["hr_combined"]/ "CSV" / "payroll_summarized3.csv", index=False)

    def _QBOTime_project(self) -> None:
        """ 
            apply PP allocation to QBO Time data, clean locaiton, and join relevant info into one table
        """
        print("\nStarting QBO Time Project Transformation\n")
        # read files
        timesheets = pd.read_csv(self.silver_path["QBO"]["Time"]/"timesheets.csv")
        jobcode = pd.read_csv(self.silver_path["QBO"]["Time"]/"jobcodes.csv")
        users = pd.read_csv(self.silver_path["QBO"]["Time"]/"users.csv")
        group = pd.read_csv(self.silver_path["QBO"]["Time"]/"group.csv")
        print(f"Read {len(timesheets)} timesheet records, {len(jobcode)} jobcodes, {len(users)} users, {len(group)} groups")
        timesheets_len, users_len = len(timesheets), len(users)
        # clean up location in group table
        ## Arizona - all produce
        group.loc[((group["corp_short"]=="A")&(group["location_name"]=="Monette Farms AZ")), "Location"] = "Arizona (produce)"
        group.loc[((group["corp_short"]=="A")&(group["location_name"]=="Monette Produce USA")), "Location"] = "Arizona (produce)"
        group.loc[((group["corp_short"]=="A")&(group["location_name"]=="Monette Seeds USA")), "Location"] = "Arizona (produce)"
        ## BC
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Ashcroft Ranch")), "Location"] = "Ashcroft"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Cache/Fischer/Loon")), "Location"] = "BritishColumbia (cattle)"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"].str.contains("silage", case=False))), "Location"] = "BritishColumbia (cattle)"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Diamond S Ranch")), "Location"] = "Diamond S"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Fraser River Ranch")), "Location"] = "Fraser River Ranch"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Home Ranch (70 Mile, LF/W, BR)")), "Location"] = "Home Ranch"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Moon Ranch")), "Location"] = "Moon Ranch"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Produce")), "Location"] = "BritishColumbia (produce)"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="Wolf Ranch")), "Location"] = "Wolf Ranch"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="SAWP")), "Location"] = "BritishColumbia (produce)"
        group.loc[((group["corp_short"]=="BC")&(group["location_name"]=="SAWP Produce")), "Location"] = "BritishColumbia (produce)"
        ## Outlook
        group.loc[((group["corp_short"]=="O")), "Location"] = "Outlook"
        ## others
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Yorkton")), "Location"] = "Yorkton"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Airdrie")), "Location"] = "Airdrie"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="BC")), "Location"] = "Unassigned"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Calderbank")), "Location"] = "Calderbank"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Eddystone")), "Location"] = "Eddystone (unspecified)"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Hafford")), "Location"] = "Hafford"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Kamsack")), "Location"] = "Kamsack"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="MFUSA Billings")), "Location"] = "Billings"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="MFUSA Box Elder")), "Location"] = "Havre"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Nexgen Seeds")), "Location"] = "NexGen"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Prince Albert")), "Location"] = "Prince Albert"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Raymore")), "Location"] = "Raymore"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Regina")), "Location"] = "Regina"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Russel Approvals")), "Location"] = "Unassigned"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Seeds")), "Location"] = "Seeds"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Swift Current")), "Location"] = "Swift Current"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="The Pas")), "Location"] = "The Pas"
        group.loc[((group["corp_short"]=="CM")&(group["location_name"]=="Waldeck")), "Location"] = "Waldeck"
        unclassified = group[group["Location"].isna()].location_name.unique()
        if len(unclassified) > 0: print(f"\nUnclassified location - {unclassified}\n")
        # create another location column for general location where bc ranches are merged into one
        group = group.rename(columns={"Location": "Location (detail)"})
        group["Location"] = group["Location (detail)"]
        group.loc[(group["Location (detail)"].isin(self.bc_ranches+["BritishColumbia (corporate)"])), "Location"] = "BritishColumbia (cattle)"
        # merge tables into one table
        ## merge location into users
        users = pd.merge(users, group.loc[:,["group_id", "location_name", "Location", "Location (detail)"]].drop_duplicates(), on="group_id", how="left")
        ## merge users into timesheets
        timesheets = pd.merge(timesheets,users.loc[:,["user_id", "group_id", "username", "full_name", "location_name","Location","Location (detail)", "first_name", "last_name"]], on="user_id", how="left")
        ## merge job into timesheets
        timesheets = pd.merge(timesheets, jobcode.loc[:,["jobcode_id","job_name","type"]].rename(columns={"type":"job_type"}), on="jobcode_id", how="left")
        assert (len(users) == users_len) and (len(timesheets) == timesheets_len), f"duplicated records found, timesheets - {timesheets_len} vs {len(timesheets)}; users - {users_len} vs {len(users)}"
        ## determine fiscal year
        timesheets["date"] = pd.to_datetime(timesheets["date"])
        timesheets["Month"] = timesheets["date"].dt.month_name()
        timesheets["FiscalYear"] = timesheets["date"].dt.year 
        mask = timesheets["Month"].isin(["November", "December"])
        timesheets.loc[mask, "FiscalYear"] = timesheets.loc[mask, "FiscalYear"] + 1
        # classify payperiods
        timesheets = self._process_pp(data=timesheets)
        # modify location for BC0
        timesheets.loc[timesheets["user_id"] == "BC6107856", "Location"] = "Unassigned"
        # classify pillars
        timesheets["Pillar"] = timesheets.apply(lambda x: self._pillar_classification(x), axis=1)
        timesheets.loc[timesheets["Pillar"] == "Missing", "Pillar"] = "Unclassified"
        # summarizing data
        ## by Location per PP 
        summarized = timesheets.groupby(["Location","PPName","FiscalYear","Cycle","PPNum", "Pillar"]).agg({"duration":"sum"}).reset_index(drop=False)
        assert len(summarized) == len(timesheets.groupby(["Location","PPName"]).agg({"duration":"sum"})), "duplicated value detected for timsheet per Location per PP summarization"
        ## read units file
        acres = pd.read_csv(self.gold_path["payroll"].parent/ "OtherTables" /"Unit_PowerBI.csv",dtype={"Location":str, "Unit":float})
        acres = acres.loc[:,["Location", "Unit"]]
        print(f"Unaccounted location for Acres Doc: {set(acres.Location.unique()) - set(summarized.Location.unique())}")
        print(f"Unaccounted location for timesheets: {set(summarized.Location.unique()) - set(acres.Location.unique())}")
        ### create BC cattle + Billings total units
        total_bc = 0
        for l in self.bc_ranches+["BritishColumbia (corporate)"]:
            total_bc += acres.loc[acres["Location"]==l, "Unit"].item()
        acres.loc[acres["Location"]=="BritishColumbia (cattle)", "Unit"] = total_bc
        # acres.loc[acres["Location"]=="Billings", "Unit"] = acres[acres["Location"].isin(["Fly Creek", "Camp 4"])].Unit.sum()
        acres["Unit"] = acres["Unit"].replace({0: 1})
        ## merge with units file
        summarized = pd.merge(summarized, acres, on="Location", how="left")
        ## calculate hours per unit
        summarized["HoursPerUnit"] = summarized["duration"] / summarized["Unit"] * 26
        summarized["Count"] = 1
        # summarize per location
        summarized2 = summarized.groupby(by=["Location","FiscalYear", "Pillar"]).agg({"HoursPerUnit":"mean", "Count":"sum"}).reset_index(drop=False)
        summarized2 = summarized2.rename(columns={"HoursPerUnit":"Avg HoursPerUnit"})
        assert len(summarized2) == len(timesheets.groupby(["Location","FiscalYear"]).agg({"duration":"sum"})), "duplicated value detected for timsheet per Location summarization"
        # summarize per pillar
        summarized3 = summarized2.groupby(by=["FiscalYear", "Pillar"]).agg({"Avg HoursPerUnit":"mean", "Count":"sum"}).reset_index(drop=False)
        assert len(summarized3) == len(timesheets[timesheets["Pillar"]!="Missing"].groupby(["Pillar","FiscalYear"]).agg({"duration":"sum"})), "duplicated value detected for timsheet per Pillar summarization"

        # saving
        print("Saving ...\n")
        self.check_file(self.gold_path["QBOTime"])
        timesheets.to_excel(self.gold_path["QBOTime"]/"QBOTime.xlsx", sheet_name = "QBOTime", index=False)
        self.check_file(self.gold_path["hr_combined"]/ "CSV")
        summarized.to_csv(self.gold_path["hr_combined"]/ "CSV" / "time_summarized1.csv", index=False)
        summarized2.to_csv(self.gold_path["hr_combined"]/ "CSV" / "time_summarized2.csv", index=False)
        summarized3.to_csv(self.gold_path["hr_combined"]/ "CSV" / "time_summarized3.csv", index=False)

    def _hr_summary(self) -> None:
        """ 
            This function consolidate payroll and QBO time summaries into one table for consolidated insights
        """
        final_df = [pd.DataFrame(), pd.DataFrame(), pd.DataFrame()]
        for i in [1, 2, 3]:
            payroll = pd.read_csv(self.gold_path["hr_combined"] / "CSV" / f"payroll_summarized{i}.csv")
            payroll_rename = {"AmountDisplay": "TotalAmount", "CostPerUnit": "AmountPerUnit", "Avg CostPerUnit": "Avg AmountPerUnit"}
            payroll = payroll.rename(columns=payroll_rename)
            payroll["Mode"] = "Payroll"
            time = pd.read_csv(self.gold_path["hr_combined"] / "CSV" / f"time_summarized{i}.csv")
            time_rename = {"duration": "TotalAmount", "HoursPerUnit": "AmountPerUnit", "Avg HoursPerUnit": "Avg AmountPerUnit"}
            time = time.rename(columns=time_rename)
            time["Mode"] = "Hours"
            final_df[i-1] = pd.concat([payroll, time], ignore_index=True)
        final_df[0].to_excel(self.gold_path["hr_combined"]/"Summarized.xlsx", sheet_name="Summarized", index=False)
        final_df[1].to_excel(self.gold_path["hr_combined"]/"Summarized2.xlsx", sheet_name="Summarized2", index=False)
        final_df[2].to_excel(self.gold_path["hr_combined"]/"Summarized3.xlsx", sheet_name="Summarized3", index=False)

    def _inventory_settlement(self) -> None:
        """ 
            prepare the data from raw QBO table for inventory project: only extracting partial Invoice, SalesReceipt, and Journal Entry
        """
        print("\nStarting Inventory Project Transformation ...\n")
        corps = ["MFL", "MFUSA"]
        cols = ["TransactionDate", "TransactionType", "TransactionID", "Corp", "Qty", "AccID", "FarmID", "CustomerID",
                "DocNumber", "TransactionEntered", "Amount"]
        journal_cols = [col for col in cols if col not in ["Qty","AveragePrice"]]
        # read tables
        print("Loading raw tables ...")
        account = pd.read_csv(self.gold_path["finance_operational"]/"Account_table.csv")
        account = account[account["Corp"].isin(corps)]
        account = account[account["AccountType"] == "Income"]
        farm = pd.read_csv(self.silver_path["QBO"]["Dimension_time"]/"Farm.csv")
        farm = farm[farm["Corp"].isin(corps)]
        customer = pd.read_csv(self.silver_path["QBO"]["Dimension_time"]/"Customer.csv")
        customer = customer[customer["Corp"].isin(corps)]
        first_date = dt.datetime(2023,11,1)
        invoice = pd.read_csv(self.silver_path["QBO"]["Raw"]/"Invoice.csv")
        invoice = invoice[invoice["Corp"].isin(corps)]
        invoice["TransactionDate"] = pd.to_datetime(invoice["TransactionDate"])
        invoice = invoice[invoice["TransactionDate"]>=first_date]
        invoice = invoice[invoice["AccID"].isin(account.AccID.unique())]
        # if len(invoice) >= 1:
        #     invoice["AveragePrice"] = invoice["Amount"] / invoice["Qty"]        # compute price per MT
        sales = pd.read_csv(self.silver_path["QBO"]["Raw"]/"SalesReceipt.csv")
        sales = sales[sales["Corp"].isin(corps)]
        sales["TransactionDate"] = pd.to_datetime(sales["TransactionDate"])
        sales = sales[sales["TransactionDate"]>=first_date]
        sales = sales[sales["AccID"].isin(account.AccID.unique())]
        # if len(sales) >= 1:
        #     sales["AveragePrice"] = sales["Amount"] / sales["Qty"]        # compute price per MT
        journal = pd.read_csv(self.silver_path["QBO"]["Raw"]/"JournalEntry.csv", dtype={"FarmID":str, "ClassID":str, "CustomerID":str, "EmployeeID":str}, usecols=journal_cols)
        journal = journal[journal["AccID"].isin(account.AccID.unique())]
        journal["TransactionDate"] = pd.to_datetime(journal["TransactionDate"])
        journal = journal[journal["TransactionDate"]>=first_date]
        journal = journal[~journal["TransactionEntered"].str.contains("Delivered and not settled", na=False)]
        journal = journal[~journal["TransactionEntered"].str.contains("Grain Inventory Receivable Adjustment", na=False)]
        # combining tables
        print("Combining Fact Tables ...")
        invoice = invoice.loc[:,[col for col in cols if col in invoice.columns]]
        sales = sales.loc[:,[col for col in cols if col in sales.columns]]
        journal = journal.loc[:,[col for col in cols if col in journal.columns]]
        facts = pd.concat([invoice, sales, journal], ignore_index=True)
        del invoice, sales, journal
        # join facts with dimension tables
        facts = pd.merge(facts, account.loc[:,["AccID","AccNum","AccName","Category","Subcategory","Commodity"]], on=["AccID"], how="left")
        facts = pd.merge(facts, farm.loc[:,["FarmID","FarmName"]], on=["FarmID"], how="left")
        facts = pd.merge(facts, customer.loc[:,["CustomerID","CustomerName"]], on=["CustomerID"], how="left")
        facts = facts[facts["Subcategory"]=="Grain - cash settlements"]
        print(f"Total Fact Entries - {len(facts)}")
        # product column
        # facts["Product"] = facts["AccName"].apply(lambda x: self._temp_get_product(x)) - now using commodity from account table
        # saving file
        print("Saving Files ...")
        self.check_file(self.gold_path["inventory"])
        facts.to_excel(self.gold_path["inventory"]/"Settlement"/"QBO_Grain_Settlements.xlsx", sheet_name="settlement", index=False)
        print("Finished\n")

    def _buget_process_input(self, inputdata_path:Path, processed_path:Path) -> None:
        """ 
            this function processes and saves budget totals for production, input (chem/fert/seed), produce budgets, and JD Lease
        """
        ## commodity prices - everything is CAD except Winter Wheat is in USD - convert everything to CAD
        pricing = pd.read_csv(inputdata_path/"25-Grain-Pricing.csv")
        pricing.loc[pricing["Commodity"]=="WW", "ForecastPrice"] *= self.fx
        ## production budget
        budget_production = pd.read_csv(inputdata_path/"25-Grain-Revenue.csv")
        budget_production = budget_production.melt(
            id_vars=["Location", "Currency", "Type"],
            var_name="Commodity",
            value_name = "Amount"
        )
        budget_production = budget_production.fillna(value = {"Amount": 0})
        budget_production["Commodity"] = budget_production["Commodity"].replace({"Hay/Silage":"Hay"})
        budget_production.loc[((budget_production["Location"]=="Airdrie")&(budget_production["Commodity"]=="Hay")), "Commodity"] = "Silage" # only Airdrie has silage, others have hay
        budget_production_summary = pd.DataFrame(budget_production.groupby(["Location","Currency","Commodity"]).agg({"Amount": "prod"})).reset_index(drop=False)
        budget_production_summary = budget_production_summary.rename(columns={"Amount":"TotalYield"})
        ### merge yield with commodity price to calculate forecast production value of commodities
        budget_production_summary = pd.merge(budget_production_summary,pricing,on=["Commodity"], how="left")
        ### manual adjustments to prices
        budget_production_summary.loc[((budget_production_summary["Location"] == "Airdrie") & (budget_production_summary["Commodity"] == "Hay")), "ForecastPrice"] = 85
        budget_production_summary.loc[((budget_production_summary["Location"] == "Colorado (Genoa)") & (budget_production_summary["Commodity"] == "WW")), "ForecastPrice"] = 12.5 * self.fx
        budget_production_summary.loc[budget_production_summary["Location"] == "Yorkton", "ForecastPrice"] *= 2/3
        budget_production_summary["ForecastProductionCAD"] = budget_production_summary["TotalYield"] * budget_production_summary["ForecastPrice"]
        budget_production_summary = budget_production_summary[budget_production_summary["ForecastProductionCAD"].notna()]
        budget_production_summary = budget_production_summary[budget_production_summary["ForecastProductionCAD"]!=0]
        ### convert prices back to USD for a adjusted column
        budget_production_summary["ForecastProductionAdj"] = budget_production_summary.apply(lambda x: x["ForecastProductionCAD"] / self.fx if x["Currency"] == "USD" else x["ForecastProductionCAD"],axis=1)
        ### save production budget
        budget_production_summary.to_csv(processed_path/"budget_production.csv",index=False)
        ## input budget
        input_budget = pd.read_csv(inputdata_path/"25-Input-Budget.csv")
        input_budget = input_budget.drop(columns=["Total acres"])
        input_budget = input_budget.melt(
            id_vars = ["Location", "Type"],
            var_name = "Commodity",
            value_name = "Amount"
        )
        input_budget = input_budget.fillna(value = {"Amount": 0})
        input_budget.loc[((input_budget["Location"]=="Yorkton")&(input_budget["Type"].isin(["Fertilizer","Chemical","Seed"]))), "Amount"] *= 2/3
        input_budget.to_csv(processed_path/"input_budget.csv",index=False)
        ## labour budget
        labour_budget = pd.read_csv(inputdata_path/"25-Labour-Budget.csv")
        labour_budget = labour_budget.melt(
            id_vars = ["Location","Currency"],
            var_name = "Month",
            value_name = "LabourBudgetCAD"
        )
        labour_budget["LabourBudgetAdj"] = labour_budget.apply(lambda x: x["LabourBudgetCAD"]/self.fx if x["Currency"]=="USD" else x["LabourBudgetCAD"], axis=1)
        labour_budget.to_csv(processed_path/"labour_budget.csv",index=False)
        ## outlook budget
        outlook = pd.read_csv(inputdata_path/"25-Outlook-Detail.csv")
        outlook = outlook.melt(
            id_vars=["Type", "ProfitType"],
            var_name="Commodity",
            value_name="Amount"
        )
        outlook = outlook.fillna(value={"Amount": 0})
        outlook.to_csv(processed_path/"outlook_budget.csv", index=False)
        ## AZ budget
        az = pd.read_csv(inputdata_path / "25-AZ-Detail.csv")
        az = az.melt(
            id_vars=["Type", "ProfitType"],
            var_name="CommodityRaw",
            value_name="AmountCAD"
        )
        az = az.fillna(value={"AmountCAD": 0})
        az.to_csv(processed_path/"az_budget.csv", index=False)
        ## BC produce details
        bc = pd.read_csv(inputdata_path / "25-BC-Detail.csv")
        bc = bc.melt(
            id_vars=["Type", "ProfitType"],
            var_name="CommodityRaw",
            value_name="AmountCAD"
        )
        bc = bc.fillna(value={"AmountCAD": 0})
        bc.to_csv(processed_path/"bc_budget.csv", index=False)
        ## JD lease
        jdlease = pd.read_csv(inputdata_path/"25-JD-Lease-Summary.csv")
        jdlease = jdlease[jdlease["AllocatedCost25"] != 0]
        jdlease.to_csv(processed_path/"JD_lease.csv", index=False)

    def _budget_read_outsidedata(self,processed_path:Path) -> tuple[pd.DataFrame,pd.DataFrame,pd.DataFrame,pd.DataFrame,pd.DataFrame,pd.DataFrame,pd.DataFrame]:
        """ 
            this function reads all the processed outside data and standardize the commodity and location naming
        """
        production_budget = pd.read_csv(processed_path/"budget_production.csv")
        input_budget = pd.read_csv(processed_path/"input_budget.csv")
        labour_budget = pd.read_csv(processed_path/"labour_budget.csv")
        outlook_budget = pd.read_csv(processed_path/"outlook_budget.csv")
        jdlease = pd.read_csv(processed_path/"JD_lease.csv")
        az_budget = pd.read_csv(processed_path/"az_budget.csv")
        bc_budget = pd.read_csv(processed_path/"bc_budget.csv")
        ## standardizing commodity naming
        production_rename_commodity = {"R Lentils":"Red Lentil", "G Lentils":"Green Lentil","Chickpeas":"Chickpea","Peas":"Field Pea", "WW": "Winter Wheat"}
        input_rename_commodity = {"R Lentils":"Red Lentil", "G Lentils":"Green Lentil","Chickpeas":"Chickpea", "WW": "Winter Wheat"}
        outlook_rename_commodity = {"Broccoli-cases/ac":"Broccoli", "Cabbage-lbs/ac":"Cabbage", "Carrots-lbs":"Carrot", "Cauliflower-cases/ac":"Cauliflower",
                                    "Table Potato-lbs":"Potato", "Seed Potato-lbs":"Potato", "Commercial Pumpkins-Bins/ac":"Pumpkin", "Strawberry Upick-lbs":"Strawberry",
                                    "Pumpkin Upick-pieces/ac":"Pumpkin", "Corn Maze-lbs":"Prairie Pathways", "WW": "Winter Wheat", "Corn (Sweet) Cobs":"Sweet Corn"}
        az_rename_commodity = {"Broccoli-cases/ac":"Broccoli", "Cabbage-lbs/ac":"Cabbage", "Pumpkins-Bins/ac":"Pumpkin", "WatermelonLG-bins/ac": "Watermelon",
                            "WatermelonMini-cases/ac": "Watermelon"}
        bc_rename_commodity = {"Broccoli-cases/ac":"Broccoli", "WatermelonLG-bins/ac": "Watermelon", "WatermelonMini-cases/ac": "Watermelon", "Pumpkins-Bins/ac":"Pumpkin",
                            "Squash-lbs": "Squash"}
        outlook_budget["CommodityRaw"] = outlook_budget["Commodity"]
        production_budget["Commodity"] = production_budget["Commodity"].replace(production_rename_commodity)
        input_budget["Commodity"] = input_budget["Commodity"].replace(input_rename_commodity)
        outlook_budget["Commodity"] = outlook_budget["Commodity"].replace(outlook_rename_commodity)
        az_budget["Commodity"] = az_budget["CommodityRaw"].replace(az_rename_commodity)
        bc_budget["Commodity"] = bc_budget["CommodityRaw"].replace(bc_rename_commodity)
        ## standardizing location naming - merge calderbank grain with Swift Current
        jdlease_rename_location = {"Swift Current Total":"Swift Current", "Regina Farm":"Regina", "Calderbank":"Swift Current",
                                "Airdrie":"Airdrie (grain)", "Eddystone":"Eddystone (grain)"}
        labour_rename_location = {"NexGen (avg met. ton)":"NexGen", "Cache/Fisher/Look":"Aschroft", "MF AZ":"Arizona (produce)", "Box Elder":"Havre", 
                                "BC Veg":"BritishColumbia (produce)","Monette Seeds CDN (avg met. ton)":"Monette Seeds", 
                                "BC Cattle (avg head)":"BritishColumbia (cattle)", "Eddystone Cattle (avg head)":"Eddystone (cattle)",
                                "Swift Current Cattle (avg head)":"Waldeck", "Aridrie Cattle (avg head)":"Airdrie (cattle)",
                                "Airdrie Farm":"Airdrie (grain)", "Eddystone Farm":"Eddystone (grain)","Calderbank":"Calderbank (cattle)"}
        input_rename_location =  {"Fly Creek/Camp 1":"Fly Creek", "Regina Farm":"Regina","Swift Current Total":"Swift Current", "Box Elder":"Havre", "Regina Farm":"Regina",
                                "Calderbank":"Calderbank (grain)","Airdrie":"Airdrie (grain)", "Eddystone":"Eddystone (grain)"}
        production_rename_location = {"Fly Creek/Camp 1":"Fly Creek", "Regina Farm":"Regina","Swift Current Total":"Swift Current", "Box Elder":"Havre", "Regina Farm":"Regina",
                                    "Colorado (Genoa)":"Colorado", "Calderbank":"Swift Current","Airdrie":"Airdrie (grain)", "Eddystone":"Eddystone (grain)"}
        input_budget["Location"] = input_budget["Location"].replace(input_rename_location)
        production_budget["Location"] = production_budget["Location"].replace(production_rename_location)
        labour_budget["Location"] = labour_budget["Location"].replace(labour_rename_location)
        jdlease["Location"] = jdlease["Location"].replace(jdlease_rename_location)
        ## put input budget (chem/fert/seed) into aggregated totals
        input_budget2 = input_budget.groupby(["Location","Type"]).agg({"Amount":"sum"}).reset_index(drop=False)
        input_budget2.loc[((input_budget2["Location"].isin(["Camp 4","Fly Creek", "Havre"]))&(input_budget2["Type"]!="Acres")), "Amount"] *= self.fx
        ## aggregated totals for production budget and JD Lease
        production_budget = pd.DataFrame(production_budget.groupby(["Location","Currency","Commodity","ForecastPrice"]).agg({"TotalYield":"sum", "ForecastProductionCAD":"sum", "ForecastProductionAdj":"sum"}).reset_index(drop=False))
        jdlease = pd.DataFrame(jdlease.groupby(["Location","Country","Currency","TotalCost25"]).agg({"Acres25":"sum","AllocatedCost25":"sum"}).reset_index(drop=False))
        return input_budget2, production_budget, labour_budget, jdlease, az_budget, bc_budget, outlook_budget

    def _budget_process_produce(self, budget_rules:pd.DataFrame,budget:pd.DataFrame,sheetname:str) -> pd.DataFrame:
        """ 
            this function provides a standardized way to process produce budgets
        """
        budget_rules = budget_rules[budget_rules["SheetRef"] == sheetname].copy(deep=True)
        budget_rules["Commodity"] = budget_rules.apply(lambda x: self._identify_product(x,for_budget=True), axis=1)
        budget["Type"] = budget["Type"].str.strip()
        # gross income - by commodity
        reference = budget[budget["Type"].isin(["Acres","Unit Price","YieldPerAc"])]
        reference = reference.groupby(["Commodity","ProfitType","CommodityRaw"]).agg({"AmountCAD":"prod"}).reset_index(drop=False)
        reference = reference.groupby(["Commodity"]).agg({"AmountCAD":"sum"}).reset_index(drop=False)
        reference = reference.rename(columns={"AmountCAD":"TotalAmountCAD"})
        reference["Category"] = "Produce - production"
        if "outlook" in sheetname.lower():
            for item in ["Prairie Pathways", "Market Garden / CSA"]:
                reference.loc[reference["Commodity"] == item, "Category"] = "Produce - cash settlements"
        # seed expense - by commodity
        expense = budget[budget["Type"] == "Seed"].copy(deep=True)
        expense = expense.drop(columns="CommodityRaw")
        expense = expense.groupby(["Commodity"]).agg({"AmountCAD":"sum"}).reset_index(drop=False).rename(columns={"AmountCAD":"TotalAmountCAD"})
        expense["Category"] = "Seed"
        # other expense - Fertilizer/Chemical - not by commodity
        expense2 = budget[budget["Type"].isin(["Fertilizer","Chemical"])]
        expense2 = expense2.groupby(["Type"]).agg({"AmountCAD":"sum"}).reset_index(drop=False).rename(columns={"AmountCAD":"TotalAmountCAD"})
        expense2["Commodity"] = "Others"
        expense2 = expense2.rename(columns={"Type":"Category"})
        # combine
        budget_produce = pd.merge(budget_rules, pd.concat([reference,expense, expense2]), on=["Commodity","Category"], how="left")
        budget_produce = budget_produce.fillna(value={"TotalAmountCAD":0})
        budget_produce["AmountCAD"] = budget_produce.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"),axis=1)
        return budget_produce

    def _budget_get_transactions(self) -> pd.DataFrame:
        """ 
            get actuals
        """
        if self.is_dev:
            transactions = pd.read_csv(self.gold_path["finance_operational"]/"PL.csv")
            self.operation_acc = pd.read_csv(self.gold_path["finance_operational"]/"AccNumTOAccID.csv")
        else:
            transactions = self.gold_pl.copy(deep=True)
        transactions = transactions[transactions["FiscalYear"] >= 2024]
        transactions["AccName"] = transactions["AccName"].str.strip()
        return transactions

    def _create_budget(self, process_input:bool = False) -> None:
        """ 
            In Progress: this function generates budgets
        """
        if self.is_dev:
            self.fx = 1.3988
        location_adj = {"Camp 4": "Billings", "Fly Creek":"Billings"}
        print("\nCreating Budget\n")
        if not self.is_dev:
            if not self.pl_exist: self._finance_operational()
        inputdata_path = self.gold_path["budget"] / "Outside Data"
        processed_path = self.gold_path["budget"] / "Processed Data"
        rule_path = self.gold_path["budget"] / "Budget Rules"
        copied_path = self.gold_path["budget"]/"Copied Data"

        # load actuals
        transactions = self._budget_get_transactions()
        
        # process outside data
        if process_input:
            self._buget_process_input(inputdata_path=inputdata_path, processed_path=processed_path)
        
        # read outside data
        input_budget2, production_budget, labour_budget, jdlease, az_budget, bc_budget, outlook_budget = self._budget_read_outsidedata(processed_path=processed_path)

        # calculate Budgets
        ## outside data
        ### read rules
        budget_rules = pd.read_csv(rule_path/"OutsideData.csv")
        budget_rename_category = {"Seed - farm":"Seed"}
        budget_rules["Category"] = budget_rules["Category"].replace(budget_rename_category)
        ### separate locations into individual rows when they are separated with + in the rules df
        budget_rules["Location"] = budget_rules["Location"].str.split("+")
        budget_rules = budget_rules.explode("Location").reset_index(drop=True)
        ### extract formula
        budget_rules = budget_rules.melt(
            id_vars=["Location","Category","AccFull","SheetRef"],
            var_name="Month",
            value_name="Formula"
        )
        budget_rules = budget_rules[~budget_rules["Location"].isin(["Outlook", "Arizona (produce)", "BritishColumbia (produce)"])]  # produce copied data -> perfect alignment with Excel budget
        budget_rules = budget_rules.fillna(value={"Formula":"0"})
        budget_rules["Formula"] = budget_rules["Formula"].astype(str)
        budget_rules["Formula"] = budget_rules["Formula"].replace({"0": "*0"})
        ### calculating input budget for accounts per location
        budget_rules_input = budget_rules[budget_rules["SheetRef"] == "Input Budget"].copy(deep=True)
        #### workaround input budget for Airdrie grain 
        input_budget2.loc[((input_budget2["Location"]=="Airdrie (grain)")&(input_budget2["Type"]=="Acres")),"Type"] = "Custom work"
        ### merge budget rules with budget total per location
        budget_input = pd.merge(budget_rules_input,input_budget2.rename(columns={"Type":"Category","Amount":"TotalAmountCAD"}),on=["Location","Category"],how="left")
        #### revert back from workaround
        input_budget2.loc[((input_budget2["Location"]=="Airdrie (grain)")&(input_budget2["Type"]=="Custom work")),"Type"] = "Acres"
        ### apply the formula to compute per month
        budget_input["AmountCAD"] = budget_input.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"), axis=1)

        ## production budget
        ### combine Hay and Silage
        production_budget["Commodity"] = production_budget["Commodity"].replace({"Hay":"Hay/Silage", "Silage":"Hay/Silage"})
        ### add commodity column to budget rules
        budget_rules_production = budget_rules[budget_rules["SheetRef"] == "Production Budget"].copy(deep=True)
        budget_rules_production["Commodity"] = budget_rules_production.apply(lambda x: self._identify_product(x, for_budget=True), axis=1)
        ### merge budget rules with budget totals
        budget_production = pd.merge(budget_rules_production,production_budget.loc[:,["Location","Commodity","ForecastProductionCAD"]].rename(columns={"ForecastProductionCAD":"TotalAmountCAD"}),
                                         on = ["Location", "Commodity"], how="left")
        budget_production = budget_production.fillna(value={"TotalAmountCAD":0})
        ### compute budget
        budget_production["AmountCAD"] = budget_production.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"),axis=1)

        ## labour budget
        budget_rules_labour = budget_rules[budget_rules["SheetRef"] == "Labour Budget"].copy(deep=True)
        budget_labour = pd.merge(budget_rules_labour, labour_budget.loc[:,["Location","Month","LabourBudgetCAD"]].rename(columns={"LabourBudgetCAD":"AmountCAD"}),
                                    on=["Location","Month"],how="left")
        
        # ## produce budgets        !! using Copied Data for exact alignment
        # ### BC
        # budget_bc_produce = self._budget_process_produce(budget_rules=budget_rules,budget=bc_budget,sheetname="BC Produce Details")
        # ### AZ
        # budget_az_produce = self._budget_process_produce(budget_rules=budget_rules,budget=az_budget,sheetname="AZ Details")
        # ### outlook
        # budget_outlook = self._budget_process_produce(budget_rules=budget_rules,budget=outlook_budget.rename(columns={"Amount":"AmountCAD"}),sheetname="Outlook Details")

        ## JD lease
        budget_rules_jd = budget_rules[budget_rules["SheetRef"]=="JD Lease"].copy(deep=True)
        budget_equipment = pd.merge(budget_rules_jd, jdlease.loc[:,["Location","AllocatedCost25"]].rename(columns={"AllocatedCost25":"TotalAmountCAD"}),
                                        on = "Location", how = "left")
        budget_equipment = budget_equipment.fillna(value={"TotalAmountCAD":0})
        budget_equipment["AmountCAD"] = budget_equipment.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"),axis=1)

        ## adjustment for Swift Current
        months = ["April", "July"]
        for month in months:
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Fertilizer")&(budget_input["Month"]==month)),"TotalAmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Fertilizer")&(budget_input["Month"]==month)),"TotalAmountCAD"].item()
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Fertilizer")&(budget_input["Month"]==month)),"AmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Fertilizer")&(budget_input["Month"]==month)),"AmountCAD"].item()
        months = ["June", "September"]
        for month in months:
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Chemical")&(budget_input["Month"]==month)),"TotalAmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Chemical")&(budget_input["Month"]==month)),"TotalAmountCAD"].item()
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Chemical")&(budget_input["Month"]==month)),"AmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Chemical")&(budget_input["Month"]==month)),"AmountCAD"].item()
        months = ["May", "June", "September"]
        for month in months:
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Seed")&(budget_input["Month"]==month)),"TotalAmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Seed")&(budget_input["Month"]==month)),"TotalAmountCAD"].item()
            budget_input.loc[((budget_input["Location"]=="Swift Current")&(budget_input["Category"]=="Seed")&(budget_input["Month"]==month)),"AmountCAD"] += \
            budget_input.loc[((budget_input["Location"]=="Calderbank (grain)")&(budget_input["Category"]=="Seed")&(budget_input["Month"]==month)),"AmountCAD"].item()
        
        # arithmetic rules
        arithmetic = pd.read_csv(rule_path/"Arithmetic.csv")
        ## faltten location
        arithmetic["Location"] = arithmetic["Location"].str.split("+")
        arithmetic = arithmetic.explode("Location").reset_index(drop=True)
        arithmetic_rules = arithmetic.melt(
            id_vars=["Location","Category","AccFull", "AccRef", "FixedRef"],
            var_name="Month",
            value_name="FormulaFull"
        )
        arithmetic_rules = arithmetic_rules[~arithmetic_rules["Location"].isin(["Outlook", "Arizona (produce)", "BritishColumbia (produce)"])]    # produce copied data -> perfect alignment with Excel budget
        ## housekeeping
        arithmetic_rules = arithmetic_rules.fillna(value={"FormulaFull":"FY-1*0"})
        arithmetic_rules["FormulaFull"] = arithmetic_rules["FormulaFull"].astype(str)
        arithmetic_rules["FormulaFull"] = arithmetic_rules["FormulaFull"].replace({"0":"FY-1*0"})
        arithmetic_rules["ReferenceYear"] = arithmetic_rules["FormulaFull"].str.slice(0,4)
        arithmetic_rules["Formula"] = arithmetic_rules["FormulaFull"].str.slice(4)
        arithmetic_rules = arithmetic_rules.fillna(value={"Formula": "0"})
        arithmetic_rules["Formula"] = arithmetic_rules["Formula"].astype(str)
        arithmetic_rules["Formula"] = arithmetic_rules["Formula"].replace({"0":"*0"})
        ## separating Fixed records
        ### processing arithmetic_rules for billings - 1.ignore camp 4, 2. rename Fly Creek to Billings
        ###     this is to avoid applying the arithmetic twice for billings, and all accounts except on are fly creek + camp 4 (identical calculation)
        ###     the only exception is for amortization, camp 4 was fixed and fly creek was arithmetic - ignore the fixed cost for camp 4 for fixed as well
        ### adjusted in excel sheet instead
        # arithmetic_rules = arithmetic_rules[~((arithmetic_rules["Location"]=="Camp 4") & (arithmetic_rules["Category"]))]
        # arithmetic_rules["Location"] = arithmetic_rules["Location"].replace(location_adj)
        arithmetic_rules_fixed = arithmetic_rules[arithmetic_rules["AccRef"] == "Fixed"].copy(deep=True)
        arithmetic_rules = arithmetic_rules[arithmetic_rules["AccRef"]!="Fixed"].copy(deep=True)

        ## process fixed records
        arithmetic_rules_fixed = arithmetic_rules_fixed.drop(columns=["FormulaFull","ReferenceYear"]).rename(columns={"FixedRef":"TotalAmountCAD"})
        arithmetic_rules_fixed["AmountCAD"] = arithmetic_rules_fixed.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"),axis=1)

        ## Extract Account Info
        arithmetic_rules["AccNum"] = arithmetic_rules["AccRef"].apply(lambda x: "".join(x.split(" ")[0:2]))
        arithmetic_rules["AccName"] = arithmetic_rules["AccRef"].apply(lambda x: (" ".join(x.split(" ")[2:]).strip()))
        assert "Fixd" not in arithmetic_rules.ReferenceYear.unique(), "Fixd records incorrectly classified"
        ## separate FY-1 & FY+1
        arithmetic_rules_prior = arithmetic_rules[arithmetic_rules["ReferenceYear"] == "FY-1"].copy(deep=True)
        arithmetic_rules = arithmetic_rules[arithmetic_rules["ReferenceYear"] == "FY+1"].copy(deep=True)

        ## process FY-1 with actuals - deleted AccName - assuming it's not being used for actuals
        actuals = transactions.groupby(["Location", "AccNum", "FiscalYear"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False)
        arithmetic_rules_prior["FiscalYear"] = self.currentFY - 1
        duplicated = actuals[actuals.duplicated(subset=["AccNum","FiscalYear","Location"],keep=False)]
        assert len(duplicated) == 0, f"Duplicated AccNum detected for FY-1 Actuals - {duplicated.AccNum.unique()}"
        budget_prior = pd.merge(arithmetic_rules_prior,actuals.rename(columns={"AmountDisplay":"TotalAmountCAD"}),
                                on = ["Location","AccNum","FiscalYear"], how="left")
        budget_prior = budget_prior.fillna(value={"TotalAmountCAD": 0})
        budget_prior["AmountCAD"] = budget_prior.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"), axis=1)

        ## processing FY+1 with current budget
        ### budget sales that is based on production budget input sheet
        arithmetic_rules_sales = arithmetic_rules[arithmetic_rules["Category"].str.contains("cash settlements")].copy(deep=True)
        # production_reference = pd.concat([budget_production.copy(deep=True), budget_outlook.copy(deep=True), budget_az_produce.copy(deep=True),budget_bc_produce.copy(deep=True)])
        production_reference = budget_production.copy(deep=True)   # excluding produce 
        production_reference = production_reference.groupby(["Location","AccFull"]).agg({"AmountCAD":"sum"}).reset_index(drop=False)
        budget_sales = pd.merge(arithmetic_rules_sales,production_reference.rename(columns={"AccFull":"AccRef"}), on=["Location","AccRef"], how="left")
        budget_sales = budget_sales.rename(columns={"AmountCAD":"TotalAmountCAD"})
        budget_sales["AmountCAD"] = budget_sales.apply(lambda x: eval(f"{x["TotalAmountCAD"]}{x["Formula"]}"),axis=1)
        budget_prior = pd.concat([budget_prior, budget_sales],ignore_index=True)

        ### budget inventory adjustment 
        arithmetic_rules_inventory = arithmetic_rules[arithmetic_rules["Category"].str.contains("inventory adjustment",case=False)].copy(deep=True)
        budget_inventory = pd.merge(arithmetic_rules_inventory, budget_prior.loc[:,["Location","AccFull","Month","AmountCAD"]].rename(columns={"AccFull":"AccRef"}),
                                on = ["Location","AccRef", "Month"], how = "left")
        budget_inventory["AmountCAD"] = -budget_inventory["AmountCAD"]
        budget_prior = pd.concat([budget_prior, budget_inventory], ignore_index=True)

        ## combine with fixed budgets
        budget_prior = pd.concat([budget_prior,arithmetic_rules_fixed],ignore_index=True)

        # copied data
        budget_copy = pd.read_csv(copied_path/"Copied Data.csv")
        copy_rename = {"Outlook (grain)": "Outlook", "Outlook (produce)":"Outlook"}
        budget_copy["Location"] = budget_copy["Location"].replace(copy_rename)
        budget_copy = budget_copy.melt(
            id_vars=["Location","Category","AccFull"],
            var_name = "Month",
            value_name = "AmountCAD"
        )
        budget_copy = budget_copy.fillna(value={"AmountCAD":0})
        budget_copy["AmountCAD"] = budget_copy["AmountCAD"].astype(float)
        budget_copy["FiscalYear"] = self.currentFY
        budget_copy["AccRef"] = "Copy"
        budget_copy["ReferenceYear"] = "NA"
        budget_copy["Formula"] = "NA"
        budget_copy["TotalAmountCAD"] = budget_copy["AmountCAD"]
        budget_copy.loc[budget_copy["Location"]=="Seeds USA", "AmountCAD"] *= self.fx
        budget_copy.loc[budget_copy["Location"]=="Arizona (produce)", "AmountCAD"] *= self.fx

        # combining all budgets
        # budget_outside = pd.concat([budget_input,budget_production,budget_labour,budget_equipment, budget_outlook, budget_az_produce, budget_bc_produce],ignore_index=True)
        budget_outside = pd.concat([budget_input,budget_production,budget_labour,budget_equipment],ignore_index=True)   # produce budget inside copied data
        budget_outside = budget_outside.drop(columns=["Commodity"])
        col_drop = ["FormulaFull","AccNum","AccName_x", "AccName_y", "AccName", "FixedRef"]
        budget_prior = budget_prior.drop(columns=[x for x in col_drop if x in budget_prior.columns])
        budget_all = pd.concat([budget_outside,budget_prior,budget_copy],ignore_index=True)
        budget_all["AccNum"] = budget_all["AccFull"].apply(lambda x: "".join(x.split(" ")[0:2]))
        budget_all["AccName"] = budget_all["AccFull"].apply(lambda x: " ".join(x.split(" ")[2:]))
        budget_all["FiscalYear"] = self.currentFY 
        budget_all["DataType"] = "Budget"
        # budget_all.loc[budget_all["Category"].str.contains("inventory adjustment",case=False), "AmountCAD"] *= -1 # turn the sign positive for inventory adjustments (my classification only)

        # enbale location to be modified, e.g., for Camp 4 + Fly Creek = Billings
        budget_all["LocationRaw"] = budget_all["Location"]
        budget_all["Location"] = budget_all["Location"].replace(location_adj)
        # return budget_all

        # save
        self.check_file(self.gold_path["budget"]/"OutputFile")
        budget_all.to_csv(self.gold_path["budget"]/"OutputFile"/"budget_all.csv", index=False)

    def _budget_update(self, force_create:bool=True, force_process_input:bool=False) -> None:
        """ 
            generate/update the actuals from the budget system
        """
        print("\nGenerating/Updating Actuals for budget system\n")
        if self.is_dev:
            self.operation_acc = pd.read_csv(self.gold_path["finance_operational"]/"AccNumTOAccID.csv")
            self.gold_pl = pd.read_csv(self.gold_path["finance_operational"]/"PL.csv")
            self.fx = 1.3988
        else:
            if not self.pl_exist:
                self._finance_operational()
        budget_path = self.gold_path["budget"]/"OutputFile"/"budget_all.csv"
        if (not Path.exists(budget_path)) or force_create:
            self._create_budget(process_input=force_process_input)
        budget = pd.read_csv(budget_path)
        budget = budget.loc[:,["Location", "SheetRef", "Month", "Formula", "TotalAmountCAD", "AmountCAD", "AccRef", "ReferenceYear","FiscalYear", "AccNum", "DataType", "Category"]]
        budget_location_rename = {"Airdrie (grain)": "Airdrie", "Airdrie (cattle)": "Airdrie", "Calderbank (cattle)": "Calderbank",
                                  "Airdrie (corporate)": "Airdrie", "Seeds USA":"Arizona (produce)"}
        budget["Location"] = budget["Location"].replace(budget_location_rename)
        # category_mapping = budget.loc[:,["AccNum", "Category"]].drop_duplicates()     # problem with old changed AccNum mapped to incorrect Category
        # organize Actuals
        transactions = self._budget_get_transactions()
        actuals_all = transactions.groupby(["Location","AccNum", "FiscalYear", "Month"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False)
        actuals_all = actuals_all[actuals_all["FiscalYear"] == self.currentFY]
        actuals_all["DataType"] = "Actual"
        actuals_all = actuals_all.rename(columns={"AmountDisplay": "AmountCAD"})
        # actuals_all = pd.merge(actuals_all,category_mapping,on="AccNum",how="left")   # problem with old changed AccNum mapped to incorrect Category
        actuals_all.to_csv(self.gold_path["budget"]/"OutputFile"/"actuals_all.csv", index=False)
        print(f"Location Unaccounted for in budget: {(set(budget.Location.unique()) - set(actuals_all.Location.unique()))}")
        # combine everything
        all_all = pd.concat([budget,actuals_all],ignore_index=True)
        all_all["FXRate"] = self.fx
        # reroute accounts for changing acc numbers
        all_all["AccNum"] = all_all["AccNum"].replace(self.accnum_reroute)
        # operational classification - AccNum to AccID mapping processed from _finance_operational() function
        assert len(self.operation_acc[self.operation_acc.duplicated(subset=["AccNum"],keep=False)]) == 0, "Duplicated AccNum Detected - Operational Accounts Classification"
        self._extract_accnum_accid()
        all_all["AccID"] = all_all["AccNum"].map(self.acc_map)
        mismatch = all_all[all_all["AccID"].isna()]
        mismatch = mismatch[mismatch['AmountCAD']!=0]
        print(f"Total amount unaccounted for because of accnum mismatching - ${mismatch.AmountCAD.sum()}")
        print(f"AccIDs with non-zero amount: {mismatch.AccNum.unique()}")
        # classify pillars
        all_all["Pillar"] = all_all.apply(lambda x: self._pillar_classification(x), axis=1)
        # save
        all_all.to_csv(self.gold_path["budget"]/"OutputFile"/"all_all.csv", index=False)
        self.check_file(self.gold_path["budget"]/"OutputPowerBI")
        # all_all.to_excel(self.gold_path["budget"]/"OutputPowerBI"/"BudgetActual.xlsx", sheet_name="Budget", index=False)
        if not self.is_dev: 
            print("Saving ...")
            all_all.to_excel(self.gold_path["budget"]/"OutputPowerBI"/"BudgetActual.xlsx", sheet_name="Budget", index=False)
            for pillar in ["Grain", "Cattle", "Seed", "Produce"]:
                all_all[all_all["Pillar"]==pillar].to_excel(self.gold_path["pillar_dashboard"]/pillar/"BudgetActual.xlsx", sheet_name="Budget", index=False)
            
    def _create_additional_financial(self, summary:pd.DataFrame) -> pd.DataFrame:
        """ 
            this function create Gross Margin, Contribution Margin, EBITDA, Net Income financial terms in the summary table
                summary table must be broken down to fiscal year, month, location
        """
        complement_data = summary.head(0).copy(deep=True)
        for y in summary.FiscalYear.unique():
            subset_year = summary[summary["FiscalYear"] == y]
            for m in summary.Month.unique():
                subset_month = subset_year[subset_year["Month"] == m]
                for l in summary.Location.unique():
                    subset_location = subset_month[subset_month["Location"] == l]
                    if len(subset_location) == 0:
                        continue
                    items = ["Sales Revenue", "Cost of Goods Sold", "Direct Operating Expenses", "Other Operating Revenue", "Operating Overheads", "Other Income", "Other Expense"]
                    values = dict.fromkeys(items, 0)
                    for i in items:
                        if i in subset_location.ProfitType.unique():
                            values[i] = subset_location.loc[subset_location["ProfitType"]==i, "AmountDisplay"].item()
                    gross_margin = values["Sales Revenue"] - values["Cost of Goods Sold"]
                    contribution_margin = gross_margin - values["Direct Operating Expenses"] + values["Other Operating Revenue"]
                    ebitda = contribution_margin - values["Operating Overheads"]
                    net_income = ebitda + values["Other Income"] - values["Other Expense"]
                    pillar = subset_location.Pillar.unique().item()
                    row = {"FiscalYear": y, "Month": m, "Location": l, "Pillar":pillar}
                    row_GM = row | {"ProfitType": "Gross Margin", "AmountDisplay": gross_margin}
                    row_CM = row | {"ProfitType": "Contribution Margin", "AmountDisplay": contribution_margin}
                    row_ebitda = row | {"ProfitType": "EBITDA", "AmountDisplay": ebitda}
                    row_NI = row | {"ProfitType": "Net Income", "AmountDisplay": net_income}
                    complement_data.loc[len(complement_data)] = row_GM
                    complement_data.loc[len(complement_data)] = row_CM 
                    complement_data.loc[len(complement_data)] = row_ebitda 
                    complement_data.loc[len(complement_data)] = row_NI 
        return complement_data

    def _finance_summary(self, create_allocation_reference = True) -> None:
        """ 
            this function assemble summary tables for financial income statement style, including Gross Margin, EBITDA, Net Income, 
                compared to (with % change compared to last year and budget)
                    1. Last Year
                    2. Budget
                    3. month-by-month
                includes Units (e.g., Acres)
        """
        if self.is_dev:
            self.operation_acc = pd.read_csv(self.gold_path["finance_operational"]/"AccNumTOAccID.csv")
            self.gold_pl = pd.read_csv(self.gold_path["finance_operational"]/"PL.csv")
            self.fx = 1.3844
        else:
            if not self.pl_exist:
                self._finance_operational()
        # prepare dfs 
        data = self.gold_pl[self.gold_pl["FiscalYear"] >=  2024].copy(deep=True)
        account = self.gold_acc[self.gold_acc["AccountingType"] == "Income Statement"]
        # create AccID -> ProfitType mapping
        id_prof_map = account.set_index("AccID")["ProfitType"]
        data["ProfitType"] = data["AccID"].map(id_prof_map)
        # summary by location, by pillar, by ProfitType, by Fiscal Year, by Month
        summary = data.groupby(["FiscalYear", "Month", "Location", "Pillar", "ProfitType"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False)
        assert len(summary) == len(data.groupby(["FiscalYear", "Month", "Location", "ProfitType"]).agg({"AmountDisplay":"sum"})), "duplicated location-pillar detected"
        # prepare df for additional financial lines, e.g., Gross Margin, Net Income, ...
        complement_data = self._create_additional_financial(summary)
        # concat two dfs
        summary = pd.concat([summary, complement_data],ignore_index=True)
        # assign datatype before budget summary
        summary["DataType"] = "Actual"
        # read processed budget transactions
        budget = pd.read_csv(self.gold_path["budget"]/"OutputFile"/"all_all.csv")
        budget = budget[budget["DataType"] == "Budget"]
        budget = budget.loc[:,["Location", "Month", "FiscalYear", "AmountCAD", "DataType", "AccID", "Pillar","AccNum"]]
        budget = budget.rename(columns={"AmountCAD":"AmountDisplay"})
        budget = budget[~((budget["AccID"].isna())&(budget["AmountDisplay"] == 0))].reset_index(drop=True)
        assert len(budget[budget["AccID"].isna()]) == 0, f"Unaccounted accounts - {budget[budget["AccID"].isna()].AccNum.unique()}"
        budget = budget.drop(columns=["AccNum"])
        # map ProfitType
        budget["ProfitType"] = budget["AccID"].map(id_prof_map)
        # budget summary
        summary_budget = budget.groupby(["FiscalYear","Month","Location","Pillar","ProfitType"]).agg({"AmountDisplay":"sum"}).reset_index(drop=False)
        assert len(summary_budget) == len(budget.groupby(["FiscalYear","Month","Location","ProfitType"]).agg({"AmountDisplay":"sum"})), "repeat Pillar detected when summarizing budget"
        # additional financial terms
        complement_data_budget = self._create_additional_financial(summary_budget)
        # final processing
        summary_budget = pd.concat([summary_budget,complement_data_budget],ignore_index=True)
        summary_budget["DataType"] = "Budget"
        print(f"Location missing from budget - {set(summary.Location.unique()) - set(summary_budget.Location.unique())}")
        print(f"Location missing from actual - {set(summary_budget.Location.unique()) - set(summary.Location.unique())}")
        # save
        summary_all = pd.concat([summary, summary_budget],ignore_index=True)
        summary_all.to_csv(self.gold_path["finance_operational"]/"ProfitTypeSummary.csv", index=False) # for reclassifying accounts
        summary_all.to_excel(self.gold_path["finance_operational"]/"ProfitTypeSummary.xlsx", sheet_name="ProfitTypeSummary", index=False)
        for pillar in ["Grain", "Cattle", "Seed", "Produce"]:
            summary_all[summary_all["Pillar"]==pillar].to_excel(self.gold_path["pillar_dashboard"]/pillar/"ProfitTypeSummary.xlsx", sheet_name="ProfitTypeSummary", index=False)

    def _APAR_concat_memo(self, df:pd.DataFrame) -> pd.DataFrame:
        """ 
            This function concatenates all 'TransactionEntered' column per TransactionID_partial
        """
        df_map = df.loc[:,["TransactionID_partial", "TransactionEntered", "Line_Id", "PrivateNote", "TransactionType"]]
        # fill missing TransactionEntered as Missing so it is string and can be concatenated
        df_map = df_map.fillna(value={"TransactionEntered":"Missing", "PrivateNote": "Missing"})
        # concatenate the line number into TransactionEntered so it's not too messy when concatenate TransactionEntered for multiple lines
        df_map["TransactionEntered"] = df_map["Line_Id"].astype(str) + ". " + df_map["TransactionEntered"]
        # concatenate -> one TransactionEntered per TransactionID_partial
        df_map2 = df_map.sort_values(by=["TransactionID_partial", "Line_Id"],ignore_index=True)\
                        .groupby(["TransactionID_partial","PrivateNote","TransactionType"])["TransactionEntered"].agg(" ".join).reset_index(drop=False)
        assert len(df_map2[df_map2.duplicated(subset="TransactionID_partial")]) == 0, "duplicated transactionID spotted when creating TransactionEntered Concatenation"
        return df_map2
    
    def _APRporting_project(self, date:set[int] = None) -> None:
        """ 
            This function connects APAgingDetails report from QBO API, and combined with raw tables such as Bill to form a comprehensive report that meets finance team's need
                This function supports date input for processing the report at that exact date
        """
        print("\nProcessing AP Rerpot\n")
        if date is not None:
            assert len(date) == 3, f"please pass the date as (YYYY, M, D), passed {date}"
            year, month, day = date
        else:
            year, month, day = self.today.year, self.today.month, self.today.day
        # read APAging report from silver space
        try:
            report = pd.read_csv(self.silver_path["QBO"]["APAR"] / "AgedPayableDetail" / str(year) / str(month) / f"{day}.csv")
        except:
            print(f'csv file not found at {self.silver_path["QBO"]["APAR"] / "AgedPayableDetail" / str(year) / str(month) / f"{day}.csv"}')
        report = report.rename(columns={"TransactionTypeID":"TransactionID_partial"})
        report_transactiontype_rename = {"Bill Payment (Cheque)":"BillPaymentCheck", "Bill Payment (Credit Card)":"BillPaymentCheck", "Bill Payment (Check)":"BillPaymentCheck",
                                        "Cheque Expense":"Purchase", "Supplier Credit": "Vendor Credit"}
        # standardize the Transaction Types
        report["TransactionType"] = report["TransactionType"].replace(report_transactiontype_rename)
        # create mapping table from facts
        cols = ["TransactionDate", "TotalAmt", "PrivateNote", "APAccID", "DocNumber", "TransactionEntered", "Amount", "TransactionID", "VendorID", "FarmID", "TransactionID_partial", "Line_Id",
                "AccID"]
        ## get bill table ready for mapping - only taking bill transactions that are relevant to AP report
        bill_col = cols + ["TermID"]
        bill = pd.read_csv(self.silver_path["QBO"]["Raw"]/"Bill.csv",usecols=bill_col)
        bill = bill[bill["TransactionID_partial"].isin(report[report["TransactionType"]=="Bill"]["TransactionID_partial"].unique())]
        bill["TransactionType"] = "Bill"
        bill_map = self._APAR_concat_memo(bill) # results should only have 4 columns:  TransactionID_partial, PrivateNote, TransactionType, TransactionEntered (concatenated)
        ## vendorcredit
        vc = pd.read_csv(self.silver_path["QBO"]["Raw"] / "VendorCredit.csv", usecols=cols)
        vc = vc[vc["TransactionID_partial"].isin(report[report["TransactionType"]=="Vendor Credit"]["TransactionID_partial"].unique())]
        vc["TransactionType"] = "Vendor Credit"
        vc_map = self._APAR_concat_memo(vc)
        ## journal entry
        journal_cols = cols + ["JEType"]
        journal = pd.read_csv(self.silver_path["QBO"]["Raw"] / "JournalEntry.csv", usecols=[x for x in journal_cols if x not in ['TotalAmt', 'APAccID']])
        journal = journal[journal["TransactionID_partial"].isin(report[report["TransactionType"]=="Journal Entry"]["TransactionID_partial"].unique())]
        journal["TransactionType"] = "Journal Entry"
        journal_map = self._APAR_concat_memo(journal)
        # merging and save facts
        fact = pd.concat([bill, vc, journal], ignore_index=True)
        fact.to_excel(self.gold_path["APReporting"]/"Facts.xlsx", index=False, sheet_name = "Facts")
        # use map table to concate PrivateNote and TransactionEntered into AP Report
        fact_map = pd.concat([bill_map, vc_map, journal_map],ignore_index=True)
        report2 = pd.merge(report, fact_map, on=["TransactionID_partial", "TransactionType"], how="left")
        assert len(report) == len(report2), "After concatenating the TransactionEntered column, duplicated transactionID detected"
        report2.to_excel(self.gold_path["APReporting"]/"APReport.xlsx", index=False, sheet_name="APReport")
        # save the Vendor and Location table
        vendor = pd.read_csv(self.silver_path["QBO"]["Dimension"]/"CSV"/"Vendor.csv")
        vendor.to_excel(self.gold_path["APReporting"].parent / "Vendor.xlsx", sheet_name = "Vendor", index=False)
        farm = pd.read_csv(self.silver_path["QBO"]["Dimension"]/"CSV"/"Farm.csv")
        farm.to_excel(self.gold_path["APReporting"].parent / "Location.xlsx", sheet_name = "Location", index=False)
        print("AP Report Done \n")

    def _HP_transformation(self) -> None:
        """ 
            This function transforms the raw data from Harvest Profit in the csv format from Silver space to curated data in Gold space for Power BI
        """
        print("\nTransforming Harvest Profit Data\n")
        # file paths
        silver_path_Delivery_HP = self.silver_path["Delivery"]["HP"]
        gold_path_inventory = self.gold_path["inventory"]
        gold_HP = gold_path_inventory/"Grain"
        product_path = gold_path_inventory / "Tables" / "commodities_acc.csv"
        folder_fields = silver_path_Delivery_HP / "Fields" / "2025"
        customers_rename = {"Viterra Kamsack":"Bunge Kamsack"}
        # read raw data
        df = pd.read_csv(silver_path_Delivery_HP/f"Loads_{self.today.year}_{self.today.month}.csv")
        # standardizing df
        df_rename = {"crop": "ProductRaw", "amount":"AmountRaw", "accepted_amount":"AcceptedAmountRaw", "amount_unit":"AmountRawUnit", "date":"TransactionDate", "entity_share":"LocationRaw",
                    "contract":"ContractRaw"}
        df = df.rename(columns=df_rename)
        df["TransactionDate"] = pd.to_datetime(df["TransactionDate"], format="%m/%d/%Y %I:%M %p")
        df["TransactionDate"] = df["TransactionDate"].dt.date
        # standardize location
        df_location_map = {"MFB Billings - US": "Billings", "MFS Swift Current - CA": "SwiftCurrent", "MFH Hafford - CA": "Hafford", "MFK Kamsack - CA":"Kamsack",
                   "MFR Regina - CA": "Regina", "MFPAS The Pas - CA": "ThePas", "MFPA  Prince Albert - CA":"PA", "MFRAY Raymore - CA":"Raymore", 
                   "MFE Eddystone - CA": "Eddystone", "MFO Outlook - CA": "Outlook", "MFAIR Airdrie - CA": "Airdrie", "Nexgen Seeds": "NexGen"}
        df["Location"] = df["LocationRaw"].replace(df_location_map)
        # preliminary transformation
        df = df[df["AmountRaw"]!=0]
        df["from"] = df["from"].str.strip()
        df["to"] = df["to"].str.strip()
        df.loc[((df["Location"]=="Kamsack")&(df["to"]=="Viterra Kamsack")), "to"] = "Bunge Kamsack"
        df.loc[((df["Location"]=="Hafford")&(df["to"]=="D.C.- P&H North Battleford")), "to"] = "P&H North Battleford"
        df.loc[((df["Location"]=="Hafford")&(df["to"]=="D.C Bunge Wheat Truck Bin")), "to"] = "Bunge Hafford"
        df.loc[((df["Location"]=="Hafford")&(df["to"]=="D.C.- Maymont GrainCorp Wheat - Truck Bin")), "to"] = "Maymont Hafford"
        # compute MT
        ## convert Bu to MT for every location excpet Billings where they might have legit Bu measures
        # df.loc[((df["Location"]!="Billings")&(df["AmountRawUnit"]=="Bu")&(df["harvest_profit_id"]!=1010153)), "AmountRawUnit"] = "Mt"
        bushels = df[df["AmountRawUnit"] == "Bu"].copy(deep=True)
        ## when units are not Bu
        df = df[df["AmountRawUnit"] != "Bu"].copy(deep=True)
        ## create unit mapping table
        units = ["Lbs", "Kg", "Mt"]
        divisor = [self.conversion_mt_to_lb, 1000, 1]
        mapping_table = pd.DataFrame(data={"AmountRawUnit": units, "divisor":divisor})
        mapping_table = mapping_table.set_index("AmountRawUnit")["divisor"]
        ## map mapping table and perform arithmetics
        df["divisor"] = df["AmountRawUnit"].map(mapping_table)
        df["AcceptedAmount"] = df["AcceptedAmountRaw"] / df["divisor"]
        df["Amount"] = df["AmountRaw"] / df["divisor"]
        df = df.drop(columns=["divisor"])
        ## when units are bushels
        if len(bushels) >= 1:
            bushels["Amount"] = bushels.apply(lambda x: x["AmountRaw"] * self.conversion_bu_to_lb_canola / self.conversion_mt_to_lb if "canola" in x["ProductRaw"].lower() 
                                            else x["AmountRaw"] * self.conversion_bu_to_lb_others / self.conversion_mt_to_lb, axis=1)
            bushels["AcceptedAmount"] = bushels.apply(lambda x: x["AcceptedAmountRaw"] * self.conversion_bu_to_lb_canola / self.conversion_mt_to_lb if "canola" in x["ProductRaw"].lower() 
                                                    else x["AcceptedAmountRaw"] * self.conversion_bu_to_lb_others / self.conversion_mt_to_lb, axis=1)
            df = pd.concat([df, bushels], ignore_index=True)
        # standardizing product - matching with QBO except Uncategorized Lentil
        def _create_hp_product_mapping(products:list[str]) -> dict[str,str]:
            """ 
                This function create mapping ProductRaw -> Product for Harvest Profit, only process strings, ignores missing values
            """
            mapping = {}
            for p in products:
                if isinstance(p, str):
                    p_lower = p.lower()
                    if 'green lentil' in p_lower:
                        mapping[p] = "Green Lentil"
                    elif 'red lentil' in p_lower:
                        mapping[p] = "Red Lentil"
                    elif 'lentil' in p_lower:
                        mapping[p] = "Unclassified Lentil"
                    elif 'barley' in p_lower:
                        mapping[p] = "Barley"
                    elif 'canola' in p_lower:
                        mapping[p] = "Canola"
                    elif 'durum' in p_lower:
                        mapping[p] = "Durum"
                    elif 'winter' in p_lower and 'wheat' in p_lower:
                        mapping[p] = "Winter Wheat"
                    elif 'wheat' in p_lower:
                        mapping[p] = "Wheat"
                    elif 'chickpea' in p_lower:
                        mapping[p] = "Chickpea"
                    elif 'pea' in p_lower:
                        mapping[p] = "Field Pea"
                    elif 'carrots' in p_lower:
                        mapping[p] = "Carrots"
                    else:
                        mapping[p] = "Unrecognized"
            return mapping
        product_mapping = _create_hp_product_mapping(df.ProductRaw.unique())
        df["Product"] = df["ProductRaw"].map(product_mapping)
        ## for FY 2025 - Regina - Green Lentil, fly creek & Raymore - Red Lentil, unspecified in HP - adjust
        df.loc[((df["Product"] == "Unclassified Lentil")&(df["Location"] == "Regina")), "Product"] = "Green Lentil"
        df.loc[((df["Product"] == "Unclassified Lentil")&(df["Location"] == "Billings")), "Product"] = "Red Lentil"
        df.loc[((df["Product"] == "Unclassified Lentil")&(df["Location"] == "Raymore")), "Product"] = "Red Lentil"

        
        # determine transfer mode
        
        ## use Location + From/To name to match fields_df 
        df["FromExtended"] = df["Location"] + "-" + df["from"].str.strip()
        df["FromExtended"] = df["FromExtended"].str.split(",").str[0]   # only look at first portion of multiple inputs to determine whether it's harvest records
        df["ToExtended"] = df["Location"] + "-" + df["to"].str.strip()
        
        ## create fields df
        files = os.listdir(folder_fields)
        fields_df = pd.DataFrame()
        for f in files:
            location = f.split(".")[0].split("_")[-1]   # extract the last part (location name) after _ separater before .xlsx suffix
            temp = pd.read_excel(folder_fields/f,dtype={"Field": str, "Acres":float})
            temp["Location"] = location
            fields_df = pd.concat([fields_df, temp],ignore_index=True)
        fields_nexgen = fields_df[fields_df["Location"]=="SwiftCurrent"].copy(deep=True)
        fields_nexgen["Location"] = "NexGen"
        fields_df = pd.concat([fields_df, fields_nexgen], ignore_index=True)
        ### Location + Field to match with HP data
        fields_df["FieldRaw"] = fields_df["Field"]
        fields_df["Field"] = fields_df["Location"] + "-" + fields_df["FieldRaw"]
        fields_df.to_csv(gold_HP / "HPFields.csv", index=False)
        
        ## determine whether a load is from field
        ### determine if a load is harvest from a field
        df["FromField"] = df["FromExtended"].isin(fields_df["Field"].unique())
        df["ToField"] = df["ToExtended"].isin(fields_df["Field"].unique())
        
        ## determine whether a load is from/to bins and identify direct deliveries
        ### read bins records
        bins_df = pd.read_csv(gold_HP / "HPBins.csv", dtype={"Bins":str, "Location":str})
        bins_nexgen = bins_df[bins_df["Location"]=="SwiftCurrent"].copy(deep=True)
        bins_nexgen["Location"] = "NexGen"
        bins_df = pd.concat([bins_df, bins_nexgen],ignore_index=True)
        # bins_df["IsDirect"] = bins_df["Bins"].str.contains(r"^(?!.*(?:Cart|Bin)).*(?:(?i:direct)|FTC|D\.C\.)", regex=True)
        bins_df["IsDirect"] = False
        bins_df["Bins"] = bins_df["Location"] + "-" + bins_df["Bins"]
        ### determine load from/to bins
        df["FromBins"] = df["FromExtended"].isin(bins_df[~bins_df["IsDirect"]]["Bins"].unique())
        df["ToBins"] = df["ToExtended"].isin(bins_df[~bins_df["IsDirect"]]["Bins"].unique())
        ## transfer_mode - applicable for valid records only
        ### transfer_mode = Harvest -> FromField==True & ToBins==True
        df.loc[(df["FromField"]&df["ToBins"]), "TransferMode"] = "Harvest"
        ### transfer_mode = Sales -> FromBins==True & ToBins==False & ToField==False
        df.loc[((df["FromBins"])&(~df["ToBins"])&(~df["ToField"])), "TransferMode"] = "Sales"
        ### transfer_mode = BinTransfer -> FromBins==True & ToBins==True
        df.loc[(df["FromBins"]&df["ToBins"]), "TransferMode"] = "BinTransfer"
        ## identify direct deliveries
        # mask = (
        #     df["FromExtended"].isin(bins_df[bins_df["IsDirect"]].Bins.unique()) | 
        #     df["ToExtended"].isin(bins_df[bins_df["IsDirect"]].Bins.unique())

        # )
        # df["IsDirect"] = mask 
        df["IsDirect"] = False
        ### in addition, if FromField==True & to is in customers list, this is also direct delivery
        df.loc[((df["FromField"])&(df["to"].isin(self.customers_list))), "IsDirect"] = True
        ### transfer_mode = Sales -> Direct Delivery
        df.loc[df["IsDirect"], "TransferMode"] = "Sales"
        
        ## invalid loads
        df = df.fillna(value={"TransferMode": "Invalid"})
        df.loc[df["TransferMode"]!="Invalid", "Flag"] = "Valid Loads"
        ### flag invalid loads
        #### when both from & to is missing, flag those records
        df.loc[((df["from"].isna())&(df["to"].isna())), "Flag"] = "[from] and [to] Unassigned"
        #### when from is missing and to is not a field or a bin, assume those records are to a customer, without stating which inventory it is from
        df.loc[((df["TransferMode"]=="Invalid")&(df["from"].isna())&(~df["ToBins"])&(df["to"].notna())), "Flag"] = "Deliveries with Unknown Origin (Bins)"
        #### when the transfer mode is sales, not direct delivery, is from bins, and to is missing, flag those loads as Delivery missing Customer
        df.loc[((df["TransferMode"]=="Sales")&(~df["IsDirect"])&(df["FromBins"])&(df["to"].isna())), "Flag"] = "Deliveries with Unknown Destination (Customer)"
        #### when the load is from field (harvest) and to is missing, flag those loads
        df.loc[((df["FromField"])&(df["to"].isna())), "Flag"] = "Harvest with Unknown Destination (Bins)"
        #### when to is a bin (most likely from harvest), and from is missing
        df.loc[((df["TransferMode"]=="Invalid")&(df["from"].isna())&(df["ToBins"])), "Flag"] = "Harvest with Unknown Origin (Field)"
        exception = df[df["Flag"].isna()]
        if len(exception) != 0:
            print(f"\nThere are {len(exception)} number of exceptions unaccounted for in \n{exception.Location.value_counts()}\n")
        df = df.fillna({"Flag": "Unknown - Investigation Required"})

        
        # create LoadType label to include invalid loads
        df.loc[(df["IsDirect"]), "LoadType"] = "Direct Delivery"
        df.loc[(df["FromField"] | df["ToBins"]) & (~df["IsDirect"]), "LoadType"] = "Harvest"
        df.loc[((~(df["FromField"] | df["ToBins"])) & (~df["IsDirect"])), "LoadType"] = "Delivery from Inventory"
        df.loc[df["Flag"]=="[from] and [to] Unassigned", "LoadType"] = "Undefined Loads"
        # df.loc[((~(df["FromField"] | df["ToBins"])) & (~df["IsDirect"])&(df["to"].notna())), "LoadType"] = "Delivery from Inventory"
        # df.loc[((~(df["FromField"] | df["ToBins"])) & (~df["IsDirect"])&(df["to"].isna())), "LoadType"] = "Undefined Loads"
        df.loc[((~df["FromBins"])&(~df["FromField"])&(df["from"].notna())&(~df["IsDirect"])), "LoadType"] = \
            df.loc[((~df["FromBins"])&(~df["FromField"])&(df["from"].notna())&(~df["IsDirect"])), "TransferMode"] = "Not Load"
        df.loc[df["TransferMode"]=="BinTransfer", "LoadType"] = "BinTransfer"

        # bin inventory - create a column to store bin no matter if it is from or to a bin
        direct_others = df[df["LoadType"].isin(["Direct Delivery","Undefined Loads","Not Load","BinTransfer"])].copy(deep=True)
        harvest = df[df["LoadType"]=="Harvest"].copy(deep=True)
        harvest["Bin"] = harvest.loc[harvest["ToBins"], "to"]
        others = df[df["LoadType"]=="Delivery from Inventory"].copy(deep=True)
        others["Bin"] = others["from"]
        df = pd.concat([harvest, others, direct_others], ignore_index=True)

        # create MT monitoring mechanism
        ranges = [-np.inf, 0, 10, 36, 48, 95, 10000, np.inf]
        ranges_indexer = pd.IntervalIndex.from_arrays(left=ranges[:-1], right=ranges[1:], closed="left")
        range_names = np.array(["Error:<0","Invalid-Small:<10", "Unusual-Small:10-36", "Valid:36-48", "Invalid-Large:48-95", "Invalid-Large:>95", "Error:>10,000"], dtype=str)
        amounts_category = ranges_indexer.get_indexer(df["AcceptedAmount"])
        df["AmountCategory"] = range_names[amounts_category]
        ## for The Pas and Billings, 48 - 95 is valid
        df.loc[(df["Location"].isin(["Billings","ThePas"])& (df["AmountCategory"]=="Invalid-Large:48-95")), "AmountCategory"] = "Valid:48-95"

        # handle bin transfer math
        ## create a copy of bin transfer, one copy will be w.r.t 'to' bins, increase amount, another copy will be w.r.t 'from' bins, decrease amount
        mask = df["LoadType"].eq("BinTransfer")
        total_amount = df[~mask].AcceptedAmount.sum()
        df3 = df[mask].copy(deep=True)
        ## original df - bin transfer - is for capturing the change in bins for column 'to' - i.e., positive amount - went into this bin
        df.loc[mask, "Bin"] = df.loc[mask,"to"]
        ## df3 is for capturing the change in bins for column 'from' - i.e., negative amount - went out of this bin
        df3["Bin"] = df3["from"]
        for col in ["AmountRaw","AcceptedAmountRaw","Amount","AcceptedAmount"]:
            df3[col] = -df3[col]
        df = pd.concat([df, df3], ignore_index=True)
        total_amount2 = df.AcceptedAmount.sum()
        assert np.abs(total_amount-total_amount2) < 0.001, f"after handling bin transfer, the total amount changed from {total_amount} to {total_amount2} - with non-zero netted bin transfers amount"

        # use contract to fill in some missing products
        ## create mapping
        contract_mapping = (
            df.dropna(subset=["ContractRaw","Product"], how="any").drop_duplicates(subset=["ContractRaw"],keep="first")[["ContractRaw","Product"]].set_index("ContractRaw")["Product"]
        )
        ## create df equal-length series for all entries (Product column)
        mapping = df["ContractRaw"].map(contract_mapping)
        ## fill na for product column only if product is missing 
        df["Product"] = df["Product"].fillna(mapping)

        # saving
        print(f"Transformed {len(df)} loads, saving ...")
        df.to_csv(gold_HP / "hp.csv", index=False)
        df.to_excel(gold_HP / "hp.xlsx", sheet_name="Loads", index=False)

    def run(self, force_run_time:bool=False, force_create_budget:bool=True, force_process_budget_input:bool=False, 
            PL_only:bool=False, AP_only:bool=False, HP_only:bool=False) -> None:
        start = perf_counter()

        
        if not HP_only:
            self._APRporting_project()  # always run AP report

            if not AP_only:
                # financial operational related projects - run if AP_only is False
                self._process_units()
                self._finance_operational()
                # self._budget_update(force_create=force_create_budget, force_process_input=force_process_budget_input)
                self._finance_summary()

                if not PL_only:
                    # run everything else
                    self._weekly_banking()
                    # payroll related
                    self._payroll_project()
                    if force_run_time or (self.today.weekday() in [0, 2, 6]): self._QBOTime_project()
                    self._hr_summary()
                    # inventory
                    self._inventory_settlement()
                    self._HP_transformation()
        else:
            # inventory
            self._inventory_settlement()
            self._HP_transformation()

        end = perf_counter()
        print(f"\nProjects Transformation Finished with {(end-start)/60:.3f} minutes\n")
