In [None]:
import pandas as pd

# contratos
CONTRATOS = ["20486403", "12861603", "AHA84901"]

def clean_movements_data(df_mov):
    df_mov = df_mov.drop_duplicates()
    df_mov = df_mov[["contract", "description", "movement_import", "operation_date"]]
    df_mov["movement_import"] = pd.to_numeric(df_mov["movement_import"], errors="coerce")
    df_mov = df_mov[df_mov["contract"].astype(str).isin(CONTRATOS)]

    depositos_mask = df_mov["description"].str.contains("Depósito|Aportación", case=False, na=False)
    retiros_mask   = df_mov["description"].str.contains("Retiro|Salida", case=False, na=False)

    df_dep_ret = df_mov[depositos_mask | retiros_mask].copy().reset_index(drop=True)

    df_dep_ret = df_dep_ret.rename(columns={
        "contract": "Contract",
        "description": "Description",
        "movement_import": "Movement_Import",
        "operation_date": "Operation_Date"
    })

    return df_dep_ret


def clean_balance_data(df_bal):
    df_bal = df_bal.drop_duplicates()
    df_bal = df_bal[["contract", "balance_date", "value_pos_mdo"]]
    df_bal["value_pos_mdo"] = pd.to_numeric(df_bal["value_pos_mdo"], errors="coerce")
    df_bal = df_bal[df_bal["contract"].astype(str).isin(CONTRATOS)]

    df_port = (df_bal
               .groupby(["contract", "balance_date"], as_index=False, dropna=True)["value_pos_mdo"]
               .sum()
               .rename(columns={
                   "contract": "Contract",
                   "balance_date": "Date",
                   "value_pos_mdo": "Portfolio_Value"
               }))

    df_port = df_port.sort_values(["Contract", "Date"]).reset_index(drop=True)

    return df_port

archivo = "data_actividad.xlsx"

df_mov_raw = pd.read_excel(archivo, sheet_name="movements")
df_bal_raw = pd.read_excel(archivo, sheet_name="balances")

df_mov_clean = clean_movements_data(df_mov_raw)
df_bal_clean = clean_balance_data(df_bal_raw)

print("Movements limpios:")
print(df_mov_clean.head())

print("\nBalances limpios:")
print(df_bal_clean.head())

# exportar si hace falta
df_mov_clean.to_csv("depositos_retiros.csv", index=False, encoding="utf-8-sig")
df_bal_clean.to_csv("valor_portafolio_diario.csv", index=False, encoding="utf-8-sig")



Movements limpios:
   Contract         Description  Movement_Import       Operation_Date
0  20486403  RETIRO DE EFECTIVO          26000.0   4 de enero de 2023
1  20486403  RETIRO DE EFECTIVO           1500.0  25 de enero de 2023
2  12861603  RETIRO DE EFECTIVO          27000.0   3 de abril de 2023
3  20486403  RETIRO DE EFECTIVO          23500.0  28 de abril de 2023
4  12861603  RETIRO DE EFECTIVO            459.1   25 de mayo de 2023

Balances limpios:
   Contract                    Date  Portfolio_Value
0  12861603     1 de agosto de 2023     249069.46260
1  12861603    1 de febrero de 2023     281525.53110
2  12861603      1 de junio de 2023     274399.30320
3  12861603      1 de marzo de 2023     293920.64506
4  12861603  1 de noviembre de 2023     256036.19410
