# Configuración

In [1]:
RANDOM_STATE = 42

# Preprocesamiento

In [2]:
import pandas as pd
import glob

df = pd.concat([pd.read_csv(csv, sep=";") for csv in glob.glob("*.csv")])

df['datetime'] = pd.to_datetime(df.date)

df['month'] = df['datetime'].dt.month.apply(lambda x: str(x))
df['weekday'] = df['datetime'].dt.weekday.apply(lambda x: str(x))
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour

seasons_map = {
    '1' : 'winter',
    '2' : 'winter',
    '3' : 'spring',
    '4' : 'spring',
    '5' : 'spring',
    '6' : 'summer',
    '7' : 'summer',
    '8' : 'summer',
    '9' : 'fall',
    '10' : 'fall',
    '11' : 'fall',
    '12' : 'winter',
}
df['season'] = df.month.replace(seasons_map)

df = df.drop(["date"], axis=1)

df = df.rename(columns={"Categoría": "category"})

df = df.drop(['title', 'datetime'], axis=1)

df.head()

Unnamed: 0,amount,category,month,weekday,day,hour,season
0,-34.13,compras,12,2,27,20,winter
1,-8.37,compras,3,6,11,0,spring
2,-27.77,compras,11,5,25,5,fall
3,2624.84,transferencias,10,0,30,17,fall
4,-31.15,transferencias,12,6,31,9,winter


# Evaluación

In [3]:
from evaluation.core import Evaluation
from evaluation.evaluations import DistributionComparisson
from evaluation.sampling import RandomSampling, StratifiedRandomSampling, ClusterSampling

sampling_strategies = [
    RandomSampling(),
    StratifiedRandomSampling(strata = ['category', 'weekday', 'month', 'season']),
    StratifiedRandomSampling(strata = ['category', 'weekday'], alias='2'),
    ClusterSampling(df=df, n_clusters = 10, fields=['amount', 'category', 'weekday', 'month', 'season', 'day'], random_state=RANDOM_STATE),
    ClusterSampling(df=df, n_clusters = 20, fields=['amount', 'category', 'weekday', 'month', 'season', 'day'], random_state=RANDOM_STATE, alias='2'),
]

evaluation_strategy = DistributionComparisson()

evaluation = Evaluation(df = df, sampling_strategies = sampling_strategies, evaluation_strategy = evaluation_strategy)

In [4]:
e = evaluation.run(random_state=RANDOM_STATE)

In [5]:
evaluation.plot()

# Dependencias

- Python 3.10.11

- Requirements:
    ```r
    altair==5.1.2
    asttokens==2.4.1
    attrs==23.1.0
    blinker==1.7.0
    branca==0.7.0
    cachetools==5.3.2
    certifi==2023.11.17
    charset-normalizer==3.3.2
    click==8.1.7
    colorama==0.4.6
    comm==0.2.0
    debugpy==1.8.0
    decorator==5.1.1
    exceptiongroup==1.1.3
    executing==2.0.1
    fastjsonschema==2.19.0
    folium==0.14.0
    gitdb==4.0.11
    GitPython==3.1.40
    idna==3.4
    importlib-metadata==6.8.0
    ipykernel==6.26.0
    ipython==8.17.2
    jedi==0.19.1
    Jinja2==3.1.2
    joblib==1.3.2
    jsonschema==4.20.0
    jsonschema-specifications==2023.11.1
    jupyter_client==8.6.0
    jupyter_core==5.5.0
    markdown-it-py==3.0.0
    MarkupSafe==2.1.3
    matplotlib-inline==0.1.6
    mdurl==0.1.2
    nbformat==5.9.2
    nest-asyncio==1.5.8
    numpy==1.26.2
    packaging==23.2
    pandas==2.1.3
    parso==0.8.3
    Pillow==10.1.0
    platformdirs==4.0.0
    plotly==5.18.0
    prompt-toolkit==3.0.41
    protobuf==4.25.1
    psutil==5.9.6
    pure-eval==0.2.2
    pyarrow==14.0.1
    pydeck==0.8.1b0
    Pygments==2.16.1
    python-dateutil==2.8.2
    pytz==2023.3.post1
    pywin32==306
    pyzmq==25.1.1
    referencing==0.31.0
    requests==2.31.0
    rich==13.7.0
    rpds-py==0.13.0
    scikit-learn==1.3.2
    scipy==1.11.4
    six==1.16.0
    smmap==5.0.1
    stack-data==0.6.3
    streamlit==1.28.2
    streamlit-folium==0.16.0
    tenacity==8.2.3
    threadpoolctl==3.2.0
    toml==0.10.2
    toolz==0.12.0
    tornado==6.3.3
    traitlets==5.13.0
    typing_extensions==4.8.0
    tzdata==2023.3
    tzlocal==5.2
    urllib3==2.1.0
    validators==0.22.0
    watchdog==3.0.0
    wcwidth==0.2.10
    zipp==3.17.0
    ```