# Javanese Sentiment Analysis for Hate Speech

**Pretrained Models**
- Javanese BERT: https://huggingface.co/w11wo/javanese-bert-small
- Javanese Roberta: https://huggingface.co/w11wo/javanese-roberta-small
- Javanese GPT-2: https://huggingface.co/w11wo/javanese-gpt2-small
- Indonesian Roberta: https://huggingface.co/flax-community/indonesian-roberta-base
- Indonesian GPT-2: https://huggingface.co/flax-community/gpt2-small-indonesian

In [3]:
from src.train import run_experiments_5cv
import numpy as np
import pandas as pd
import os
from transformers import logging
logging.set_verbosity(logging.ERROR)
import warnings
warnings.filterwarnings('ignore')

In [17]:
# Hyperparameters
model_names = ['w11wo/javanese-bert-small',
               'w11wo/javanese-roberta-small',
               'w11wo/javanese-gpt2-small',
               'flax-community/indonesian-roberta-base',
               'flax-community/gpt2-small-indonesian']
classifier_hidden_size = 64
n_experiments = 1
n_epochs = 10
patience = 2
batch_size = 16

# Make sure all models can be loaded
from transformers import AutoTokenizer, AutoModel
for model_name in model_names:
    AutoTokenizer.from_pretrained(model_name)
    AutoModel.from_pretrained(model_name)

In [None]:
models = []
unfrozen_layers = []
poolings = []
f1s = []
mean_f1s = []
std_f1s = []

# Cross validation over all model configurations
for model_name in model_names:
    for layers_to_unfreeze in [0, 1, 2, 3]:
        for pooling in ['CLS', 'MLP']:
            if pooling == 'CLS' and 'gpt' in model_name:
                continue
            f1 = run_experiments_5cv(model_name, layers_to_unfreeze, pooling,
                                     classifier_hidden_size, n_experiments, n_epochs,
                                     patience, batch_size)
            models.append(model_name)
            unfrozen_layers.append(layers_to_unfreeze)
            poolings.append(pooling)
            f1s.append(f1)
            mean_f1s.append(np.mean(f1))
            std_f1s.append(np.std(f1))

# Make DataFrame
df = pd.DataFrame({'model': models,
                   'unfrozen_layers': unfrozen_layers,
                   'pooling': poolings,
                   'f1': f1s,
                   'mean_f1': mean_f1s,
                   'std_f1': std_f1s})
os.makedirs('results', exist_ok=True)
df.to_csv('results/results.csv', index=False)

In [13]:
df = pd.read_csv('results/results.csv')
df['model'] = df['model'].apply(lambda x: x.split('/')[1])

# Reshape the dataframe
df_pivot = df.pivot_table(index=['model', 'pooling'], columns='unfrozen_layers', values=['mean_f1', 'std_f1'])
df_pivot.reset_index(inplace=True)
df_pivot.columns = ['_'.join(map(str, col)).strip() for col in df_pivot.columns.values]

# Combine mean and standard deviation columns into a single column as 'mean ± std'
for layer in [0, 1, 2, 3]:
    mean_col = f'mean_f1_{layer}'
    std_col = f'std_f1_{layer}'
    combined_col = f'f1_{layer}'
    df_pivot[combined_col] = df_pivot.apply(lambda x: f'{x[mean_col]:.3f} ± {x[std_col]:.3f}', axis=1)

# Select only the combined columns along with model and pooling
selected_columns = ['model_', 'pooling_'] + [f'f1_{layer}' for layer in [0, 1, 2, 3]]
df_combined = df_pivot[selected_columns]
df_combined.rename(columns={f'f1_{layer}': str(layer) for layer in [0, 1, 2, 3]}, inplace=True)
df_combined.columns = pd.MultiIndex.from_tuples([('', 'Model'), 
                                                 ('', 'Pooling')] + 
                                                [('Unfrozen layers', str(layer)) for layer in [0, 1, 2, 3]])
df_combined.to_latex('results/results.tex', index=False)
df_combined

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unfrozen layers,Unfrozen layers,Unfrozen layers,Unfrozen layers
Unnamed: 0_level_1,Model,Pooling,0,1,2,3
0,gpt2-small-indonesian,MLP,0.826 ± 0.021,0.854 ± 0.014,0.853 ± 0.025,0.852 ± 0.022
1,indonesian-roberta-base,CLS,0.692 ± 0.028,0.796 ± 0.018,0.736 ± 0.039,0.568 ± 0.001
2,indonesian-roberta-base,MLP,0.772 ± 0.014,0.803 ± 0.019,0.725 ± 0.078,0.568 ± 0.001
3,javanese-bert-small,CLS,0.691 ± 0.008,0.757 ± 0.025,0.670 ± 0.088,0.568 ± 0.001
4,javanese-bert-small,MLP,0.755 ± 0.020,0.759 ± 0.042,0.635 ± 0.085,0.568 ± 0.001
5,javanese-gpt2-small,MLP,0.800 ± 0.022,0.797 ± 0.030,0.811 ± 0.022,0.807 ± 0.026
6,javanese-roberta-small,CLS,0.684 ± 0.021,0.763 ± 0.021,0.760 ± 0.014,0.572 ± 0.008
7,javanese-roberta-small,MLP,0.748 ± 0.021,0.775 ± 0.006,0.658 ± 0.076,0.568 ± 0.001
