# Prepare experimentation

In [7]:
import pandas as pd
import io

EXPERIMENT_ID = "exp004"
TRAIN_DATA_FILE = "artifacts/data/all_train.csv"
BACKTESTING_STRATEGY = "train-val"
TEST_SIZE_RATIO = 0.2
BACKTESTING_VAL_RATIO = 0.1
BACKTESTING_PERF_RATIO = 0.1
FINAL_VAL_RATIO = 0.1
TARGET_COLUMN = "Sentiment"

## Generate splits

In [None]:
!uv run cli generate-splits \
    --experiment-id $EXPERIMENT_ID \
    --input-file $TRAIN_DATA_FILE \
    --backtesting-strategy $BACKTESTING_STRATEGY \
    --test-size $TEST_SIZE_RATIO \
    --backtesting-val-size $BACKTESTING_VAL_RATIO \
    --perf-estimation-val-size $BACKTESTING_PERF_RATIO \
    --final-model-val-size $FINAL_VAL_RATIO

In [2]:
!ls -lah artifacts/experiments/exp004
!ls -lah artifacts/experiments/exp004/backtesting
!ls -lah artifacts/experiments/exp004/performance_estimation/
!ls -lah artifacts/experiments/exp004/final_model

total 296
drwxr-xr-x@ 6 witjakuczunpriv  staff   192B Oct 18 12:22 [1m[36m.[m[m
drwxr-xr-x@ 7 witjakuczunpriv  staff   224B Oct 18 12:38 [1m[36m..[m[m
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:42 [1m[36mbacktesting[m[m
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:22 [1m[36mfinal_model[m[m
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:22 [1m[36mperformance_estimation[m[m
-rw-r--r--@ 1 witjakuczunpriv  staff   146K Oct 18 12:22 test.csv
total 8
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:42 [1m[36m.[m[m
drwxr-xr-x@ 6 witjakuczunpriv  staff   192B Oct 18 12:22 [1m[36m..[m[m
-rw-r--r--@ 1 witjakuczunpriv  staff   648B Oct 18 13:00 results.json
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:22 [1m[36mtrain-val[m[m
total 1176
drwxr-xr-x@ 4 witjakuczunpriv  staff   128B Oct 18 12:22 [1m[36m.[m[m
drwxr-xr-x@ 6 witjakuczunpriv  staff   192B Oct 18 12:22 [1m[36m..[m[m
-rw-r--r--@ 1 witjakuczunpriv  staff   521K

# Develop models...

In [None]:
!uv run cli list-models

[32m2025-10-18 16:53:50.613[0m | [1mINFO    [0m | [36mmlops.app[0m:[36mlist_models[0m:[36m134[0m - [1mAvailable models:[0m
- bielik-1.5B
- svm-opt
- roberta-base
- bert-base
- svm-base
- bert-micro-base
- roberta-base-long
- bert-micro-long
- gemma-3-1b-it


# Run experiments

## SVM base

In [4]:
SVM_BASE_CFG = "svm-base"

In [None]:
!uv run cli run-backtesting \
    --experiment-id $EXPERIMENT_ID \
    --model-config-name $SVM_BASE_CFG

## SVM with hiperparameter optimization

In [None]:
SVM_OPT_CFG = "svm-opt"

In [None]:
!uv run cli run-backtesting \
    --experiment-id $EXPERIMENT_ID \
    --model-config-name $SVM_OPT_CFG

## First check - check our baseline performance

In [None]:
!uv run cli python main.py compare-models \
    --experiment-id $EXPERIMENT_ID

In [8]:
data = """mean      mean      mean      mean
    0   svm-base  0.619658  0.627262  0.619658  0.621129
    1    svm-opt  0.692308  0.678169  0.692308  0.674845
    """
df = pd.read_csv(
        io.StringIO(data),
        skiprows=1,
        delim_whitespace=True,
        header=None,
        index_col=0,
        names=['model', 'accuracy', 'precision', 'recall', 'f1_score']
   )
df

  df = pd.read_csv(


Unnamed: 0,model,accuracy,precision,recall,f1_score
0,svm-base,0.619658,0.627262,0.619658,0.621129
1,svm-opt,0.692308,0.678169,0.692308,0.674845


## Transformer model (Roberta)

### Micro-bert

[HuggingFace Link](https://huggingface.co/boltuix/bert-micro)

In [5]:
MICRO_BERT_BASE = "bert-micro-base"

In [None]:
!uv run cli run-backtesting \
    --experiment-id $EXPERIMENT_ID \
    --model-config-name $MICRO_BERT_BASE

## Second check

In [None]:
!uv run cli compare-models \
    --experiment-id $EXPERIMENT_ID

In [9]:
data = """mean      mean      mean      mean
0  bert-micro-base  0.536325  0.287644  0.536325  0.374458
1         svm-base  0.619658  0.627262  0.619658  0.621129
2          svm-opt  0.692308  0.678169  0.692308  0.674845
"""
df = pd.read_csv(
        io.StringIO(data),
        skiprows=1,
        delim_whitespace=True,
        header=None,
        index_col=0,
        names=['model', 'accuracy', 'precision', 'recall', 'f1_score']
   )
df

  df = pd.read_csv(


Unnamed: 0,model,accuracy,precision,recall,f1_score
0,bert-micro-base,0.536325,0.287644,0.536325,0.374458
1,svm-base,0.619658,0.627262,0.619658,0.621129
2,svm-opt,0.692308,0.678169,0.692308,0.674845


Big dissapointment! Let's check more advanced model.

## Optimized mirco-bert 

In [None]:
MICRO_BERT_LONG = "bert-micro-long"

In [None]:
!uv run run-backtesting \
    --experiment-id $EXPERIMENT_ID \
    --model-config-name $MICRO_BERT_LONG

## Third check

In [None]:
!uv run cli compare-models \
    --experiment-id $EXPERIMENT_ID

In [None]:
data = """model_name  accuracy precision    recall  f1_score
                        mean      mean      mean      mean
0  bert-micro-base  0.536325  0.287644  0.536325  0.374458
1         svm-base  0.619658  0.627262  0.619658  0.621129
2          svm-opt  0.692308  0.678169  0.692308  0.674845
"""
df = pd.read_csv(
        io.StringIO(data),
        skiprows=1,
        delim_whitespace=True,
        header=None,
        index_col=0,
        names=['model', 'accuracy', 'precision', 'recall', 'f1_score']
   )
df

## Bigger model Roberta

In [10]:
ROBERTA_BASE = "roberta-base"

In [11]:
!uv run cli run-backtesting \
    --experiment-id $EXPERIMENT_ID\
    --model-config-name $ROBERTA_BASE

[32m2025-10-18 16:55:30.603[0m | [1mINFO    [0m | [36mmlops.runner[0m:[36mrun_backtesting[0m:[36m58[0m - [1m--- Backtesting Fold 0 ---[0m
[32m2025-10-18 16:55:30.603[0m | [1mINFO    [0m | [36mmlops.runner[0m:[36m_load_model_from_config[0m:[36m28[0m - [1mLoading model: RobertaModel from models.roberta.model[0m
[32m2025-10-18 16:55:34.793[0m | [1mINFO    [0m | [36mmlops.runner[0m:[36mrun_backtesting[0m:[36m68[0m - [1mTraining model...[0m
[32m2025-10-18 16:55:34.793[0m | [1mINFO    [0m | [36mmodels.roberta.pipeline[0m:[36mrun[0m:[36m15[0m - [1mPreprocessing data for RoBERTa model...[0m
Map: 100%|████████████████████████| 4205/4205 [00:00<00:00, 11619.62 examples/s]
[32m2025-10-18 16:55:35.782[0m | [1mINFO    [0m | [36mmodels.roberta.pipeline[0m:[36mrun[0m:[36m41[0m - [1mPreprocessing validation data...[0m
Map: 100%|██████████████████████████| 468/468 [00:00<00:00, 11702.31 examples/s]
[32m2025-10-18 16:55:35.843[0m | [1mINFO  

## Fourth check

In [12]:
!uv run cli compare-models \
    --experiment-id $EXPERIMENT_ID

[32m2025-10-18 17:39:50.131[0m | [1mINFO    [0m | [36mevaluate[0m:[36mcompare_models[0m:[36m79[0m - [1m--- Model Comparison for Experiment 'exp004' ---[0m
[32m2025-10-18 17:39:50.132[0m | [1mINFO    [0m | [36mevaluate[0m:[36mcompare_models[0m:[36m82[0m - [1m
--- Run Type: backtesting ---[0m
[32m2025-10-18 17:39:50.132[0m | [1mINFO    [0m | [36mevaluate[0m:[36mcompare_models[0m:[36m91[0m - [1mSummary for backtesting (train-val strategy):[0m
[32m2025-10-18 17:39:50.138[0m | [1mINFO    [0m | [36mevaluate[0m:[36mcompare_models[0m:[36m95[0m - [1m
        model_name  accuracy precision    recall  f1_score
                        mean      mean      mean      mean
0  bert-micro-base  0.536325  0.287644  0.536325  0.374458
1     roberta-base  0.816239  0.817263  0.816239  0.793307
2         svm-base  0.619658  0.627262  0.619658  0.621129
3          svm-opt  0.692308  0.678169  0.692308  0.674845[0m


In [None]:
data = """mean      mean      mean      mean
0  bert-micro-base  0.536325  0.287644  0.536325  0.374458
1     roberta-base  0.816239  0.817263  0.816239  0.793307
2         svm-base  0.619658  0.627262  0.619658  0.621129
3          svm-opt  0.692308  0.678169  0.692308  0.674845
"""
df = pd.read_csv(
        io.StringIO(data),
        skiprows=1,
        delim_whitespace=True,
        header=None,
        index_col=0,
        names=['model', 'accuracy', 'precision', 'recall', 'f1_score']
   )
df

  df = pd.read_csv(


Unnamed: 0,model,accuracy,precision,recall,f1_score
0,bert-micro-base,0.536325,0.287644,0.536325,0.374458
1,roberta-base,0.816239,0.817263,0.816239,0.793307
2,svm-base,0.619658,0.627262,0.619658,0.621129
3,svm-opt,0.692308,0.678169,0.692308,0.674845
