In [2]:
import pandas as pd
import numpy as np

inputs = pd.read_csv("../data/raw/inputs_prices.csv", sep=";")
commodities = pd.read_csv("../data/raw/commodities_prices.csv", sep=";")
fx = pd.read_csv("../data/raw/exchange_rate.csv", sep=";")

inputs["date"] = pd.to_datetime(inputs["date"])
commodities["date"] = pd.to_datetime(commodities["date"])
fx["date"] = pd.to_datetime(fx["date"])


In [3]:
assert inputs["currency"].isin(["USD", "ARS"]).all()
assert commodities["currency"].isin(["ARS"]).all()

In [4]:
inputs = inputs.sort_values("date")
commodities = commodities.sort_values("date")
fx = fx.sort_values("date")

In [5]:
inputs_fx = pd.merge_asof(
    inputs,
    fx,
    on="date",
    direction="backward"
)

commodities_fx = pd.merge_asof(
    commodities,
    fx,
    on="date",
    direction="backward"
)


In [6]:
inputs_fx["usd_ars_sell"].isna().mean()
commodities_fx["usd_ars_sell"].isna().mean()


np.float64(0.0)

In [7]:
inputs_fx["price_usd"] = np.where(
    inputs_fx["currency"] == "USD",
    inputs_fx["price"],
    inputs_fx["price"] / inputs_fx["usd_ars_sell"]
)

In [8]:
commodities_fx["price_usd"] = (
    commodities_fx["price"] / commodities_fx["usd_ars_sell"]
)

In [9]:
inputs_fx = inputs_fx.sort_values(["id_articulo", "date"])

inputs_fx["price_usd"] = (
    inputs_fx
        .groupby("id_articulo")["price_usd"]
        .ffill()
)

In [10]:
inputs_fx["year_month"] = inputs_fx["date"].dt.to_period("M")
commodities_fx["year_month"] = commodities_fx["date"].dt.to_period("M")

In [11]:
commodities_monthly = (
    commodities_fx
        .groupby(["commodity_id", "year_month"])
        .agg(price_usd_mean=("price_usd", "mean"))
        .reset_index()
)


In [12]:
inputs_monthly = (
    inputs_fx
        .sort_values("date")
        .groupby(["id_articulo", "year_month"])
        .agg(price_usd_last=("price_usd", "last"))
        .reset_index()
)

In [13]:
fx_monthly = (
    fx
    .assign(year_month=fx["date"].dt.to_period("M"))
    .groupby("year_month")
    .agg(
        usd_ars_mean=("usd_ars_sell", "mean"),
        usd_ars_last=("usd_ars_sell", "last")
    )
    .reset_index()
)


In [14]:
inputs_monthly.to_parquet("../data/processed/inputs_monthly.parquet")
commodities_monthly.to_parquet("../data/processed/commodities_monthly.parquet")
fx_monthly.to_parquet("../data/processed/fx_monthly.parquet")