In [308]:
import pandas as pd

In [309]:
excel_dbfs_path = f"./david_finance.xlsx"
sheet_name = f"ingresos_recurrentes"

In [310]:
dfp = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = sheet_name, dtype = {"ingreso_por_evento":'float'})

In [311]:
class RecurrentIncome:
    def __init__(
            self, 
            df: pd.DataFrame = None, 
            start_date: str = "fecha_de_inicio", 
            periods: str = "cantidad_de_frecuencias", 
            frequency: str = "frecuencia_de_ingreso",
            valid_frequencies = ["semanal", "catorcenal", "quincenal", "mensual", "bimestral", "trimestral", "tetramestral", "semestral", "anual"]
    ):
        self.start_date = start_date
        self.periods = periods
        self.frequency = frequency
        self.valid_frequencies = valid_frequencies
        self.df = df
        self.empty_recurrents_df = pd.DataFrame(
                        columns=['fecha_de_ingreso' ,
                                'ingreso_por_evento', 
                                'cuenta_destino', 
                                'descripcion', 
                                'categoria', 
                                'realizado_por', 
                                'ingreso_recurrente_o_extraoridinario', 
                                'comentarios'
                                ])
                        
        
        self.condition_blank_comment = lambda text: text if text == text else ""
    
    def pdconcat_wise(self, base_df: pd.DataFrame, concat_df: pd.DataFrame) -> pd.DataFrame:
        if concat_df is None:
            return base_df
        elif len(base_df) > 0:
            return pd.concat(
                        [
                            base_df,
                            concat_df
                        ],
                            ignore_index=True
                    )
        else:
            return concat_df

    def create_nthly_recurrents(self,row, date_range)-> pd.DataFrame:
        return  pd.DataFrame(
            data= {'freq': range(len(date_range)),
                    'fecha_de_ingreso': date_range,
                    'ingreso_por_evento': row["ingreso_por_evento"], 
                    'cuenta_destino': row["cuenta_destino"],
                    'descripcion': self.condition_blank_comment(row["descripcion"]), 
                    'categoria':  row["categoria"], 
                    'realizado_por': row["realizado_por"],
                    'ingreso_recurrente_o_extraoridinario': "Recurrente", 
                    'comentarios': self.condition_blank_comment(row["comentarios"])
                    }
                )
    
    def get_income_details(self) -> pd.DataFrame:
        frequency_set = set(self.df[self.frequency])
        df_recurrents_cum = self.empty_recurrents_df
        if self.valid_frequencies[0] in frequency_set: # semanal
            current_df = self.df.loc[self.df[self.frequency] == self.valid_frequencies[0]]
            for _,row in current_df.iterrows():
                # create date range per row
                date_range = pd.date_range(start = row[self.start_date], periods = row[self.periods], freq = "7D")
                # based on date range, creates detailed rows from the main row and concat to the cum recurrents
                df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_nthly_recurrents(row, date_range))

        if self.valid_frequencies[2] in frequency_set: # quincenal
            current_df = self.df.loc[self.df[self.frequency] == self.valid_frequencies[2]]
            for _,row in current_df.iterrows():
                # for my semi monthly pays that are 14 and 29 I need to create two monthly dateranges and then alternate their values
                date_range_1 = pd.date_range(
                    start = f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", 
                    periods = row["cantidad_de_frecuencias"] // 2, 
                    freq = "MS"
                ) + pd.DateOffset(days=row['fecha_de_inicio'].day - 1)

                date_range_2 = pd.date_range(
                    start = f"{row['fecha_de_inicio'].year}-{row['fecha_de_inicio'].month}-01", 
                    periods = row["cantidad_de_frecuencias"] // 2, 
                    freq = "MS"
                ) + pd.DateOffset(days=row['fecha_de_inicio'].day + 14)

                date_range = [val for pair in zip(date_range_1, date_range_2) for val in pair]
                date_range = pd.to_datetime(date_range)

                # based on date range, creates detailed rows from the main row and concat to the cum recurrents
                df_recurrents_cum = self.pdconcat_wise(df_recurrents_cum, self.create_nthly_recurrents(row, date_range)) 
        return df_recurrents_cum
    
    
        

In [312]:
recurrent_incomes = RecurrentIncome(dfp)

In [313]:
new_recurrent_incomes = recurrent_incomes.get_income_details()

In [314]:
new_recurrent_incomes.index.name = "indice"
new_recurrent_incomes.to_csv("ingresos_recurrentes_detalle.csv", encoding="utf-8-sig")

In [315]:
actual_recurrent_incomes = pd.read_excel(excel_dbfs_path, engine = "openpyxl", sheet_name = "ingresos_recurrentes_detalle", dtype = {"monto_total_deuda":'float'}, na_filter=False, index_col="indice")

In [316]:
display(actual_recurrent_incomes)

Unnamed: 0_level_0,freq,fecha_de_ingreso,ingreso_por_evento,cuenta_destino,descripcion,categoria,realizado_por,ingreso_recurrente_o_extraoridinario,comentarios
indice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


In [317]:
dups_criteria_list = ["fecha_de_ingreso", "descripcion", "categoria", "cuenta_destino", "realizado_por"]

In [318]:
recurrent_incomme_updated = RecurrentIncome().pdconcat_wise(actual_recurrent_incomes, new_recurrent_incomes).drop_duplicates(dups_criteria_list, keep="first")

In [319]:
recurrent_incomme_updated.index.name = "indice"
recurrent_incomme_updated.to_csv("ingresos_recurrentes_detalle.csv", encoding="utf-8-sig")

In [320]:
%store recurrent_incomme_updated

Stored 'recurrent_incomme_updated' (DataFrame)
