In [1]:
import pandas as pd

In [3]:
excel_dbfs_path = f"./david_finance.xlsx"
sheet_name = f"cargos_recurrentes"

In [4]:
dfp = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = sheet_name, dtype = {"monto_por_evento":'float'}, na_filter= False)
dfp["dias_de_margen"] = dfp["dias_de_margen"].replace("","0").astype(int)

In [5]:
tdc_df = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = "tarjetas_de_credito")

In [6]:
class RecurrentsCreator:
    def __init__(self, df: pd.DataFrame = None):
        self.empty_recurrents_df = pd.DataFrame(
                        columns=['fecha_de_cargo' ,
                                 'comercio',
                                'descripcion', 
                                'categoria', 
                                'monto', 
                                'forma_de_pago', 
                                'realizado_por', 
                                'limite_de_pago', 
                                'deuda_a_plazos_recurrente_o_normal', 
                                'comentarios',
                                ])

        self.tdc_df = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = "tarjetas_de_credito")
        self.condition_blank_comment = lambda text: text if text == text else ""
        self.df = df
        
        
    def get_pay_deadlines(self, row, date_range_i_row):
        """
        :ref:`row` is the current iteration row
        """
        try:
            dia_de_corte = self.tdc_df.loc[tdc_df["credit_cards"] == row["forma_de_pago"]]["dia_de_corte"].values[0]
            plazo_para_pagar = self.tdc_df.loc[tdc_df["credit_cards"] == row["forma_de_pago"]]["plazo_para_pagar"].values[0]
            return [
                pd.to_datetime(f"{date.year}-{date.month}-{dia_de_corte}", format = "%Y-%m-%d") + pd.DateOffset(days= plazo_para_pagar, months=1) 
                if date.day > dia_de_corte 
                else pd.to_datetime(f"{date.year}-{date.month}-{dia_de_corte}", format = "%Y-%m-%d") + pd.DateOffset(days= plazo_para_pagar) 
                for date in date_range_i_row
            ]
        except IndexError:
            return date_range_i_row


    def create_nthly_recurrents(self,row, date_range_i_row):
        return  pd.DataFrame(
            data={'fecha_de_cargo': date_range_i_row,
                  'comercio':row['comercio'],
                    'descripcion': self.condition_blank_comment(row["descripcion"]), 
                    'categoria': row["categoria"], 
                    'monto': row["monto_por_evento"], 
                    'forma_de_pago': row["forma_de_pago"], 
                    'realizado_por': row["realizado_por"], 
                    'limite_de_pago': self.get_pay_deadlines(row, date_range_i_row), 
                    'deuda_a_plazos_recurrente_o_normal': "Recurrente", 
                    'comentarios': self.condition_blank_comment(row["comentarios"])
                    }
                )

    
    def create_monthly_date_range(self, row):
        return  pd.date_range(
                    start = f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", 
                    periods = row["cantidad_de_frecuencias"], 
                    freq = "MS"
                ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)

    def create_monthly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "mensual"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_monthly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
    
        
    def create_semimonthly_date_range(self, row):
        return pd.date_range(
                    start = f"{row['fecha_de_inicio']}", 
                    periods = row["cantidad_de_frecuencias"], 
                    freq = "SME"
                ) + pd.DateOffset(days=row["dias_de_margen"])
    
    def create_semimonthly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "quincenal"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_semimonthly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_bimonthly_date_range(self, row):
        return pd.date_range(
                start = pd.to_datetime(f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", format="%Y-%m-%d"), 
                periods = row['cantidad_de_frecuencias'], 
                freq = "2MS"
                ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)
    
    def create_bimonthly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "bimestral"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_bimonthly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_weekly_date_range(self, row):
        return pd.date_range(start = row['fecha_de_inicio'], periods = row['cantidad_de_frecuencias'], freq = "7D") + pd.DateOffset(days=row['dias_de_margen'])
    
    def create_weekly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "semanal"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_weekly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_biweekly_date_range(self, row):
        return pd.date_range(start = row['fecha_de_inicio'], periods = row['cantidad_de_frecuencias'], freq = "14D") + pd.DateOffset(days=row['dias_de_margen'])
    
    
    def create_biweekly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "catorcenal"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_biweekly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_fourmonthly_date_range(self, row):
        return pd.date_range(
        start = pd.to_datetime(f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", format="%Y-%m-%d"), 
        periods = row['cantidad_de_frecuencias'], 
        freq = "4MS"
        ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)
    
    
    def create_fourmonthly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "tetramestral"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_fourmonthly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_quarterly_date_range(self, row):
        return pd.date_range(
        start = pd.to_datetime(f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", format="%Y-%m-%d"), 
        periods = row['cantidad_de_frecuencias'], 
        freq = "3MS"
        ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)
    
    def create_quarterly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "trimestral"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_quarterly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None    
    
    def create_semiyearly_date_range(self, row):
        return pd.date_range(
        start = pd.to_datetime(f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", format="%Y-%m-%d"), 
        periods = row['cantidad_de_frecuencias'], 
        freq = "6MS"
        ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)
    
    def create_semiyearly_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "semestral"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_semiyearly_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def create_annual_date_range(self, row):
        return pd.date_range(
        start = pd.to_datetime(f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", format="%Y-%m-%d"), 
        periods = row['cantidad_de_frecuencias'], 
        freq = "YS"
        ) + pd.DateOffset(months=row['fecha_de_inicio'].month - 1,  days=row['fecha_de_inicio'].day - 1)
        
    def create_annual_recurrents(self, frequency_desc_column: str = "frecuencia_de_pago", frecuency_desc_value: str = "anual"):
        df = self.df.loc[self.df[frequency_desc_column] == frecuency_desc_value].copy()
        if len(df) > 0:
            df_new_recurrents = self.empty_recurrents_df
            for _,row in df.iterrows():
                date_range_i_row = self.create_annual_date_range(row)
                if len(df_new_recurrents) == 0:
                    df_new_recurrents = self.create_nthly_recurrents(row, date_range_i_row)
                else:
                    df_new_recurrents = (
                        pd.concat([
                            df_new_recurrents,
                            self.create_nthly_recurrents(row, date_range_i_row)
                        ],
                            ignore_index=True
                        )
                    ) 
            print("Rows generated successfully")
            return df_new_recurrents
        else:
            return None
        
    def pdconcat_wise(self, base_df: pd.DataFrame, concat_df: pd.DataFrame):
        if concat_df is None:
            return base_df
        elif len(base_df) > 0:
            return pd.concat(
                        [
                            base_df,
                            concat_df
                        ],
                            ignore_index=True
                    )
        else:
            return concat_df

    def create_all_recurrents(self):
        df_recurrents_cum = self.empty_recurrents_df
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_weekly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_biweekly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_semimonthly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_monthly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_bimonthly_recurrents()) 
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_quarterly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_fourmonthly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_semiyearly_recurrents())
        df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_annual_recurrents())
        return df_recurrents_cum





In [7]:
recurrents_creator = RecurrentsCreator(dfp)

In [8]:
recurrents_creator.df

Unnamed: 0,fecha_de_transaccion,frecuencia_de_pago,fecha_de_inicio,cantidad_de_frecuencias,dias_de_margen,monto_por_evento,forma_de_pago,descripcion,categoria,realizado_por,comentarios
0,2024-05-13,bimestral,2024-05-13,12,0,500.0,Hey Credito,Recibo Gas,Servicios,David,
1,2024-05-13,bimestral,2024-05-13,12,0,500.0,Hey Credito,Recibo luz,Servicios,David,
2,2024-05-20,mensual,2024-05-20,12,0,510.0,Hey Credito,Izzi internet,Servicios,David,
3,2024-05-17,catorcenal,2024-05-17,10,0,120.0,Efectivo,Corte Cabello Buho,Artículos de belleza,David,


In [9]:
df_all_recurrents = recurrents_creator.create_all_recurrents()

Rows generated successfully
Rows generated successfully
Rows generated successfully


In [10]:
df_all_recurrents.to_csv("recurrentes_detalle.csv", encoding="utf-8-sig", index = False)

In [11]:
recurrentes_df = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = "cargos_recurrentes_detalle", dtype = {"monto_total_deuda":'float'}, na_filter=False)

In [12]:
display(recurrentes_df)

Unnamed: 0,fecha_de_cargo,descripcion,categoria,monto,forma_de_pago,realizado_por,limite_de_pago,deuda_a_plazos_recurrente_o_normal,comentarios
0,2024-05-17,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-05-17,Recurrente,
1,2024-05-24,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-05-24,Recurrente,
2,2024-05-31,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-05-31,Recurrente,
3,2024-06-07,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-06-07,Recurrente,
4,2024-06-14,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-06-14,Recurrente,
5,2024-06-21,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-06-21,Recurrente,
6,2024-06-28,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-06-28,Recurrente,
7,2024-07-05,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-07-05,Recurrente,
8,2024-07-12,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-07-12,Recurrente,
9,2024-07-19,Corte Cabello Buho,Artículos de belleza,120,Efectivo,David,2024-07-19,Recurrente,


In [13]:
dups_criteria_list = ["fecha_de_cargo","comercio", "descripcion", "categoria", "forma_de_pago", "realizado_por"]

In [14]:
recurrentes_df = RecurrentsCreator().pdconcat_wise(recurrentes_df, df_all_recurrents).drop_duplicates(dups_criteria_list, keep="first")

In [15]:
recurrentes_df.to_csv("cargos_recurrentes_detalle.csv", encoding="utf-8-sig", index = False)

In [16]:
%store recurrentes_df

Stored 'recurrentes_df' (DataFrame)
