In [17]:
import pandas as pd
import numpy as np

## Expected folder structure
## 
```
.
├── data
│   ├── dummy_gold.txt
│   ├── dummy_pred.txt
│   └── efcamdat_100k_with_text_and_measures.csv
├── logs
│   ├── new_data_classification_report.txt
│   ├── test_classification_report.txt
│   └── train_classification_report.txt
├── notebooks
│   └── andrews-CEFR-benchmark.ipynb
├── README.md
├── requirements.txt
└── scripts
    ├── aff.py
    ├── evaluate_predictions.py
    ├── evaluation.py
    ├── predict.py
    ├── train_generic.py
    └── train.py
```

In [9]:
andrews100kdf = pd.read_csv("../data/efcamdat_100k_with_text_and_measures.csv",index_col=0)

In [11]:
andrews100kdf.head()


Unnamed: 0,writing_id,cefr_level,measures.collocations.text_level.ratio_num_token,measures.collocations.text_level.ttr,measures.counts.acl,measures.counts.acl_ratio,measures.counts.acl:relcl,measures.counts.acl:relcl_ratio,measures.counts.ADJ,measures.counts.ADJ_ratio,...,measures.taassc.L2SCA.CT_T,measures.taassc.L2SCA.DC_C,measures.taassc.L2SCA.DC_T,measures.taassc.L2SCA.MLC,measures.taassc.L2SCA.MLS,measures.taassc.L2SCA.MLT,measures.taassc.L2SCA.T_S,measures.taassc.L2SCA.VP_T,text,l1
0,115499,B1,0.108696,1.0,2,0.021739,1,0.01087,3,0.032609,...,0.125,0.0,1.0,0.0,11.5,11.5,1.0,1.5,grandmas home remedies and recipes. Do you hav...,German
1,1081381,A1,0.111111,1.0,0,0.0,0,0.0,6,0.166667,...,0.166667,0.0,0.666667,0.0,16.0,5.333333,3.0,1.0,My friend is very nice.She comes from Italy.Sh...,Italian
2,452770,B1,0.176,1.0,2,0.016,1,0.008,5,0.04,...,0.111111,0.0,1.333333,0.0,125.0,13.888889,9.0,1.777778,"First, I will study a lot and finish my degree...",Portuguese
3,412035,A1,0.05,1.0,0,0.0,0,0.0,2,0.05,...,0.0,0.0,0.75,0.0,6.666667,10.0,0.666667,1.5,"Hy, my name's Andr. I have thirty one years ol...",Portuguese
4,132380,B1,0.061224,1.0,0,0.0,1,0.010204,6,0.061224,...,0.222222,0.0,0.888889,0.0,8.636364,10.555556,0.818182,1.777778,bello! I glad to congratulate you with the bes...,Russian


In [112]:
cefr_levels =["A1","A2","B1","B2","C1","C2"]
sample_size = 10
dummy_test_set = andrews100kdf.sample(sample_size).reset_index()
hard_golden_labels=dummy_test_set["cefr_level"].to_dict()
print(hard_golden_labels)

{0: 'B2', 1: 'A1', 2: 'A1', 3: 'A2', 4: 'A1', 5: 'A2', 6: 'C1', 7: 'A1', 8: 'A2', 9: 'A2'}


{0: 'A1',
 1: 'A1',
 2: 'A2',
 3: 'B1',
 4: 'A1',
 5: 'A1',
 6: 'A2',
 7: 'A2',
 8: 'A1',
 9: 'A2'}

In [124]:
random_probas = np.random.dirichlet(np.ones(5),size=sample_size)
'''predictions = {
    {
     "A1": 0.31,
     "A2": 0.54,
     "B1": 0.4,
     "B2": 0.74,
     "C1": 0.74,
    }
}'''
soft_predictions = {
    id_: {class_:proba for class_, proba in zip(cefr_levels, cefr_vector)}
    for id_, cefr_vector in zip(range(sample_size),random_probas.tolist())
}
idx_to_class_ = lambda v: {
         0: "A1",
         1: "A2",
         2: "B1",
         3: "B2",
         4: "C1",
}.get(v, None)
hard_predictions = {k:idx_to_class_(v) for k,v in zip(range(sample_size), np.argmax(random_probas,axis=1).tolist())}

In [141]:
from sklearn.metrics import classification_report

class hard_predictions_eval:
    def __init__(self, hard_predictions_dict : dict, hard_gold_labels_dict : dict):
        self.hard_predictions_dict = hard_predictions_dict
        self.hard_gold_labels_dict = hard_gold_labels_dict
        self.y_pred = [v for v in self.hard_predictions_dict.values()], 
        self.y_true = [v for v in self.hard_gold_labels_dict.values()]
        print(self.y_pred, self.y_true)
    def accuracy(self):
        pass
    def precision(self):
        pass
    def recall(self):
        pass
    def report(self):
        # assuming dicts are aligned with all same ids
        self.report = classification_report(
            [v for v in self.hard_predictions_dict.values()], 
            [v for v in self.hard_gold_labels_dict.values()]
        )
        print(self.report)
        

In [142]:
def soft_predictions_eval(predictions, gold_labels):
    pass

In [143]:
hard_eval = hard_predictions_eval(hard_predictions,hard_golden_labels)

(['A1', 'C1', 'B2', 'B1', 'B2', 'A2', 'B1', 'A1', 'C1', 'B2'],) ['B2', 'A1', 'A1', 'A2', 'A1', 'A2', 'C1', 'A1', 'A2', 'A2']


In [144]:
hard_eval.report()

              precision    recall  f1-score   support

          A1       0.25      0.50      0.33         2
          A2       0.25      1.00      0.40         1
          B1       0.00      0.00      0.00         2
          B2       0.00      0.00      0.00         3
          C1       0.00      0.00      0.00         2

    accuracy                           0.20        10
   macro avg       0.10      0.30      0.15        10
weighted avg       0.07      0.20      0.11        10



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### What is the distribution of CEFR level ?


In [80]:
andrews100kdf["cefr_level"].value_counts()/len(andrews100kdf["cefr_level"])

cefr_level
A1    0.47215
A2    0.29701
B1    0.16172
B2    0.05497
C1    0.01415
Name: count, dtype: float64