# Ocena modeli dystrybucyjnych dla korpusu pełnego i wzorcowego

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

tqdm.pandas()

from src.settings import POLISH_ANNOTATIONS_FPATH, DATA_DIR

In [2]:
RANDOM_STATE = 42

EMB_VECTOR_SIZE = 100
EMB_WINDOW = 5
VOCAB_MIN_COUNT = 5

MIN_N_GRAM = 1
MAX_N_GRAM = 4

EPOCHS = 30

In [3]:
import string
import token
import numpy as np
from nltk.tokenize import word_tokenize


def preprocess(text, stopwords):
    text = text.lower()
    text = "".join([word for word in text if word not in string.punctuation])
    tokens = word_tokenize(text, language="polish")
    tokens = [word for word in tokens if word not in stopwords]
    if not any(tokens):
        return np.nan
    return " ".join(tokens)

with open(DATA_DIR / "polish.stopwords.txt", "r") as f:
    stop_words = set(f.readlines())

## Korpus pełny

In [4]:
korpus_pelny = pd.read_csv(DATA_DIR / "all_calls_texts_per_channel.csv")
korpus_pelny

Unnamed: 0.1,Unnamed: 0,text,channel,source_file,call
0,1,(yy) Dzień... dzień dobry pani. (yy) Ja dzwoni...,clnt,cbiz_tc_1.clnt.txt,cbiz_tc_1
1,2,"(...) Dobry, Anna Kwiatkowska, Nowy Bank. W cz...",cbiz_tc_1,cbiz_tc_1.txt,cbiz_tc_1
2,3,Kwiatkowska. Przychodnia MedMed. W czym mogę p...,agnt,cbiz_tc_10.agnt.txt,cbiz_tc_10
3,4,"Dzień dobry pani. Ja tutaj zadzwonię do pani, ...",clnt,cbiz_tc_10.clnt.txt,cbiz_tc_10
4,5,Kwiatkowska. Przychodnia MedMed. W czym mogę p...,cbiz_tc_10,cbiz_tc_10.txt,cbiz_tc_10
...,...,...,...,...,...
12024,12025,"(yy) Dzień dobry. Proszę pana, (yy) nie mogę s...",clnt,cbiz_tc_994.clnt.txt,cbiz_tc_994
12025,12026,Dzień dobry. Mikołaj Dębek. Biuro obsługi klie...,cbiz_tc_994,cbiz_tc_994.txt,cbiz_tc_994
12026,12027,"Dzień dobry, Andrzej Kwaśniak, Biuro Obsługi K...",agnt,cbiz_tc_995.agnt.txt,cbiz_tc_995
12027,12028,"(yy) Dzień dobry, Michał Flagowy z tej strony....",clnt,cbiz_tc_995.clnt.txt,cbiz_tc_995


In [5]:
korpus_pelny["text"].isna().sum()

0

In [6]:
korpus_pelny_preprocessed = korpus_pelny.copy()
korpus_pelny_preprocessed["text"] = korpus_pelny_preprocessed["text"].progress_apply(preprocess, args=(stop_words,))

korpus_pelny_preprocessed["text"][5]

  0%|          | 0/12029 [00:00<?, ?it/s]

'dzień dobry karolina adamczyk telekom sa w czym mogę pomóc oczywiście już wszystko sprawdzimy chciałam się tylko dowiedzieć yy czy to jest ten numer telefonu z którego pani dzwoni tak dobrze prosiłabym jeszcze o hasło abonenckie dobrze to w takim razie może inaczej jest pani właścicielem tego numeru tak dobrze to prosiłabym w takim razie o numer pesel i adres zameldowania sześćdziesiąt cztery tak tak siedem czy jeden przepraszam siedem dobrze tak czterysta sześćdziesiąt dobrze i jeszcze adres zameldowania ludowa tak dwadzieścia osiem tak siemianowice śląskie yy dobrze proszę jeszcze dać dać chwilkę system wczyta dane dobrze yy proszę mi jeszcze powiedzieć jak proszę mi jeszcze powiedzieć jak rozumiem w ustawieniach tryb sieci tak aha dobrze wie pani co to spróbujemy zrobić reset w takim razie i powinno pomóc w tej sytuacji prosiłabym rozłączyć internet i spróbować się połączyć ponownie za około piętnaście minut być może to już wcześniej się włączy mogę pani jeszcze jakoś pomóc dobrze 

In [7]:
pelny_tokens = korpus_pelny_preprocessed["text"].str.split().explode().tolist()
len(pelny_tokens)

6674344

## Zbiór danych do klasyfikacji (tekst - numer zadania), korpus wzorcowy

In [8]:
korpus_wzorcowy = pd.read_json(POLISH_ANNOTATIONS_FPATH, lines=True)
korpus_wzorcowy

Unnamed: 0,start,end,text,doc_filepath,video_filename,task_label
0,39040,41120,"Myślę, że mam inny pomysł, można?",/15/K66BF13-26_15_15_signsNO.eaf,K66BF13-26.mp4,15
1,41120,49680,"Chyba ten znak mówi, że jak ktoś będzie spacer...",/15/K66BF13-26_15_15_signsNO.eaf,K66BF13-26.mp4,15
2,49680,55280,Trzeba przejść łukiem obok leżącego i o tym zn...,/15/K66BF13-26_15_15_signsNO.eaf,K66BF13-26.mp4,15
3,61280,66840,"Okrągły znak pomaga nam, mówi, że są pasy na u...",/15/K66BF13-26_15_15_signsNO.eaf,K66BF13-26.mp4,15
4,66840,73200,"Jak ktoś zobaczy, ale zignoruje ten znak, to m...",/15/K66BF13-26_15_15_signsNO.eaf,K66BF13-26.mp4,15
...,...,...,...,...,...,...
40350,248960,250320,[uderzanie w coś],/13/K17BF13-26_13_13_comics.eaf,K17BF13-26.mp4,13
40351,279560,291400,Kot zobaczył w akwarium rybkę. Podszedł i dał ...,/13/K17BF13-26_13_13_comics.eaf,K17BF13-26.mp4,13
40352,291400,347120,"Zjadłam, zjadłam.",/13/K17BF13-26_13_13_comics.eaf,K17BF13-26.mp4,13
40353,347120,356440,"Zając biegnie, zobaczył wiszące pranie. Wskocz...",/13/K17BF13-26_13_13_comics.eaf,K17BF13-26.mp4,13


In [9]:
target_labels = korpus_wzorcowy["task_label"].astype(str).unique().tolist()
target_labels

['15', '24', '7', '17', '8', '13']

In [10]:
korpus_wzorcowy["text"].isna().sum(), korpus_wzorcowy["task_label"].isna().sum()

(0, 0)

In [11]:
korpus_wzorcowy_preprocessed = korpus_wzorcowy.copy()
korpus_wzorcowy_preprocessed["text"] = korpus_wzorcowy_preprocessed["text"].apply(preprocess, args=(stop_words,))

korpus_wzorcowy_preprocessed["text"][5]

'ktoś biega bez świadomości i może spowodować wypadek a znak nam pomaga przypomina żeby uważać'

In [12]:
korpus_wzorcowy_preprocessed.isna().sum()

start             0
end               0
text              2
doc_filepath      0
video_filename    0
task_label        0
dtype: int64

In [13]:
pl_data_preprocessed = korpus_wzorcowy_preprocessed.dropna()
pl_data_preprocessed.shape

(40353, 6)

In [14]:
wzorcowy_X_train, wzorcowy_X_test, wzorcowy_y_train, wzorcowy_y_test = train_test_split(
    pl_data_preprocessed['text'],
    pl_data_preprocessed['task_label'],
    test_size=0.2,
    stratify=pl_data_preprocessed['task_label'],
    random_state=RANDOM_STATE
)

In [15]:
wzorcowy_X_train.shape, wzorcowy_X_test.shape, wzorcowy_y_train.shape, wzorcowy_y_test.shape

((32282,), (8071,), (32282,), (8071,))

In [16]:
wzorcowy_train_tokens = wzorcowy_X_train.str.split().explode().tolist()

## Modele dystrybucyjne - korpus wzorcowy

In [17]:
from gensim.models import Word2Vec

# sg ({0, 1}, optional) – Training algorithm: 1 for skip-gram; otherwise CBOW.
wzorcowy_word2vec_skipgram = Word2Vec(
    sg=1,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    workers=4
)

In [18]:
wzorcowy_word2vec_cbow = Word2Vec(
    sg=0,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    workers=4
)

In [19]:
from gensim.models import FastText

wzorcowy_fasttext_skipgram = FastText(
    sg=1,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    min_n=MIN_N_GRAM,
    max_n=MAX_N_GRAM,
    workers=4
)

In [20]:
wzorcowy_fasttext_cbow = FastText(
    sg=0,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    min_n=MIN_N_GRAM,
    max_n=MAX_N_GRAM,
    workers=4
)

In [21]:
emb_models = [wzorcowy_word2vec_skipgram, wzorcowy_word2vec_cbow, wzorcowy_fasttext_skipgram, wzorcowy_fasttext_cbow]

In [22]:
for emb_model in tqdm(emb_models):
    emb_model.build_vocab(wzorcowy_train_tokens)
    print(f"{emb_model.__class__.__name__} {'Skip-gram' if emb_model.sg == 1 else 'CBOW'}", len(emb_model.wv))

  0%|          | 0/4 [00:00<?, ?it/s]

Word2Vec Skip-gram 41
Word2Vec CBOW 41
FastText Skip-gram 41
FastText CBOW 41


In [23]:
for emb_model in tqdm(emb_models):
    emb_model.train(wzorcowy_train_tokens, total_examples=len(wzorcowy_train_tokens), epochs=EPOCHS)

  0%|          | 0/4 [00:00<?, ?it/s]

In [24]:
def vectorize(sentence, emb_model):
    words = sentence.split()
    words_vecs = [emb_model.wv[word] for word in words if word in emb_model.wv]
    if len(words_vecs) == 0:
        return np.zeros(100)
    words_vecs = np.array(words_vecs)
    return words_vecs.mean(axis=0)

wzorcowy_X_train_word2vec_skipgram = np.array([vectorize(sentence, emb_model=wzorcowy_word2vec_skipgram) for sentence in wzorcowy_X_train])
wzorcowy_X_test_word2vec_skipgram = np.array([vectorize(sentence, emb_model=wzorcowy_word2vec_skipgram) for sentence in wzorcowy_X_test])

In [25]:
wzorcowy_X_train_word2vec_cbow = np.array([vectorize(sentence, emb_model=wzorcowy_word2vec_cbow) for sentence in wzorcowy_X_train])
wzorcowy_X_test_word2vec_cbow = np.array([vectorize(sentence, emb_model=wzorcowy_word2vec_cbow) for sentence in wzorcowy_X_test])

In [26]:
wzorcowy_X_train_fasttext_skipgram = np.array([vectorize(sentence, emb_model=wzorcowy_fasttext_skipgram) for sentence in wzorcowy_X_train])
wzorcowy_X_test_fasttext_skipgram = np.array([vectorize(sentence, emb_model=wzorcowy_fasttext_skipgram) for sentence in wzorcowy_X_test])

In [27]:
wzorcowy_X_train_fasttext_cbow = np.array([vectorize(sentence, emb_model=wzorcowy_fasttext_cbow) for sentence in wzorcowy_X_train])
wzorcowy_X_test_fasttext_cbow = np.array([vectorize(sentence, emb_model=wzorcowy_fasttext_cbow) for sentence in wzorcowy_X_test])

In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

wzorcowy_word2vec_skipgram_clf = LogisticRegression()
wzorcowy_word2vec_skipgram_clf.fit(wzorcowy_X_train_word2vec_skipgram, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [29]:
wzorcowy_word2vec_cbow_clf = LogisticRegression()
wzorcowy_word2vec_cbow_clf.fit(wzorcowy_X_train_word2vec_cbow, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [30]:
wzorcowy_fasttext_skipgram_clf = LogisticRegression()
wzorcowy_fasttext_skipgram_clf.fit(wzorcowy_X_train_fasttext_skipgram, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [31]:
wzorcowy_fasttext_cbow_clf = LogisticRegression()
wzorcowy_fasttext_cbow_clf.fit(wzorcowy_X_train_fasttext_cbow, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [32]:
from pprint import pprint
from sklearn.metrics import classification_report

def evaluate_clf(clf, X_test, y_test, target_names=None):
    y_pred = clf.predict(X_test)
    return classification_report(y_test, y_pred, output_dict=False, target_names=target_names, zero_division=0, digits=4)

word2vec_skipgram_clf_report = evaluate_clf(wzorcowy_word2vec_skipgram_clf, wzorcowy_X_test_word2vec_skipgram, wzorcowy_y_test, target_names=target_labels)
print(word2vec_skipgram_clf_report)

              precision    recall  f1-score   support

          15     0.6414    0.0700    0.1262      1814
          24     0.0000    0.0000    0.0000      1143
           7     0.0000    0.0000    0.0000       997
          17     0.3228    0.8423    0.4668      2410
           8     0.2940    0.3681    0.3269      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.3250      8071
   macro avg     0.2097    0.2134    0.1533      8071
weighted avg     0.2867    0.3250    0.2190      8071



In [33]:
word2vec_cbow_clf_report = evaluate_clf(wzorcowy_word2vec_cbow_clf, wzorcowy_X_test_word2vec_cbow, wzorcowy_y_test, target_names=target_labels)
print(word2vec_cbow_clf_report)

              precision    recall  f1-score   support

          15     0.6396    0.0695    0.1253      1814
          24     0.0000    0.0000    0.0000      1143
           7     0.0000    0.0000    0.0000       997
          17     0.3228    0.8423    0.4667      2410
           8     0.2940    0.3681    0.3269      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.3249      8071
   macro avg     0.2094    0.2133    0.1532      8071
weighted avg     0.2863    0.3249    0.2188      8071



In [34]:
fasttext_skipgram_clf_report = evaluate_clf(wzorcowy_fasttext_skipgram_clf, wzorcowy_X_test_fasttext_skipgram, wzorcowy_y_test, target_names=target_labels)
print(fasttext_skipgram_clf_report)

              precision    recall  f1-score   support

          15     0.4825    0.5408    0.5100      1814
          24     0.3585    0.1452    0.2067      1143
           7     0.3971    0.0542    0.0953       997
          17     0.4150    0.7415    0.5322      2410
           8     0.3936    0.3523    0.3718      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.4255      8071
   macro avg     0.3411    0.3057    0.2860      8071
weighted avg     0.3939    0.4255    0.3729      8071



In [35]:
fasttext_cbow_clf_report = evaluate_clf(wzorcowy_fasttext_cbow_clf, wzorcowy_X_test_fasttext_cbow, wzorcowy_y_test, target_names=target_labels)
print(fasttext_cbow_clf_report)

              precision    recall  f1-score   support

          15     0.4867    0.5628    0.5220      1814
          24     0.3750    0.1864    0.2490      1143
           7     0.3622    0.0923    0.1471       997
          17     0.4449    0.7137    0.5481      2410
           8     0.3918    0.3918    0.3918      1266
          13     0.1053    0.0045    0.0087       441

    accuracy                         0.4391      8071
   macro avg     0.3610    0.3252    0.3111      8071
weighted avg     0.4073    0.4391    0.3963      8071



## Modele dystrybucyjne - korpus pełny

In [36]:
pelny_word2vec_skipgram = Word2Vec(
    sg=1,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    workers=4
)

In [37]:
pelny_word2vec_cbow = Word2Vec(
    sg=0,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    workers=4
)

In [38]:
pelny_fasttext_skipgram = FastText(
    sg=1,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    min_n=MIN_N_GRAM,
    max_n=MAX_N_GRAM,
    workers=4
)

In [39]:
pelny_fasttext_cbow = FastText(
    sg=0,
    vector_size=EMB_VECTOR_SIZE,
    window=EMB_WINDOW,
    min_count=VOCAB_MIN_COUNT,
    min_n=MIN_N_GRAM,
    max_n=MAX_N_GRAM,
    workers=4
)

In [40]:
pelny_emb_models = [pelny_word2vec_skipgram, pelny_word2vec_cbow, pelny_fasttext_skipgram, pelny_fasttext_cbow]

In [41]:
for emb_model in tqdm(pelny_emb_models):
    emb_model.build_vocab(pelny_tokens)
    print(f"{emb_model.__class__.__name__} {'Skip-gram' if emb_model.sg == 1 else 'CBOW'}", len(emb_model.wv))

  0%|          | 0/4 [00:00<?, ?it/s]

Word2Vec Skip-gram 49
Word2Vec CBOW 49
FastText Skip-gram 49
FastText CBOW 49


In [42]:
for emb_model in tqdm(pelny_emb_models):
    emb_model.train(pelny_tokens, total_examples=len(pelny_tokens), epochs=EPOCHS)

  0%|          | 0/4 [00:00<?, ?it/s]

In [43]:
pelny_X_train_word2vec_skipgram = np.array([vectorize(sentence, emb_model=pelny_word2vec_skipgram) for sentence in wzorcowy_X_train])
pelny_X_test_word2vec_skipgram = np.array([vectorize(sentence, emb_model=pelny_word2vec_skipgram) for sentence in wzorcowy_X_test])

In [44]:
pelny_X_train_word2vec_cbow = np.array([vectorize(sentence, emb_model=pelny_word2vec_cbow) for sentence in wzorcowy_X_train])
pelny_X_test_word2vec_cbow = np.array([vectorize(sentence, emb_model=pelny_word2vec_cbow) for sentence in wzorcowy_X_test])

In [45]:
pelny_X_train_fasttext_skipgram = np.array([vectorize(sentence, emb_model=pelny_fasttext_skipgram) for sentence in wzorcowy_X_train])
pelny_X_test_fasttext_skipgram = np.array([vectorize(sentence, emb_model=pelny_fasttext_skipgram) for sentence in wzorcowy_X_test])

In [46]:
pelny_X_train_fasttext_cbow = np.array([vectorize(sentence, emb_model=pelny_fasttext_cbow) for sentence in wzorcowy_X_train])
pelny_X_test_fasttext_cbow = np.array([vectorize(sentence, emb_model=pelny_fasttext_cbow) for sentence in wzorcowy_X_test])

In [47]:
pelny_word2vec_skipgram_clf = LogisticRegression()
pelny_word2vec_skipgram_clf.fit(pelny_X_train_word2vec_skipgram, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [48]:
pelny_word2vec_cbow_clf = LogisticRegression()
pelny_word2vec_cbow_clf.fit(pelny_X_train_word2vec_cbow, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [49]:
pelny_fasttext_skipgram_clf = LogisticRegression()
pelny_fasttext_skipgram_clf.fit(pelny_X_train_fasttext_skipgram, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [50]:
pelny_fasttext_cbow_clf = LogisticRegression()
pelny_fasttext_cbow_clf.fit(pelny_X_train_fasttext_cbow, wzorcowy_y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [51]:
pelny_word2vec_skipgram_clf_report = evaluate_clf(pelny_word2vec_skipgram_clf, pelny_X_test_word2vec_skipgram, wzorcowy_y_test, target_names=target_labels)
print(pelny_word2vec_skipgram_clf_report)

              precision    recall  f1-score   support

          15     0.6414    0.0700    0.1262      1814
          24     0.0000    0.0000    0.0000      1143
           7     0.0000    0.0000    0.0000       997
          17     0.3238    0.8423    0.4677      2410
           8     0.2978    0.3768    0.3326      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.3264      8071
   macro avg     0.2105    0.2149    0.1544      8071
weighted avg     0.2875    0.3264    0.2202      8071



In [52]:
pelny_word2vec_cbow_clf_report = evaluate_clf(pelny_word2vec_cbow_clf, pelny_X_test_word2vec_cbow, wzorcowy_y_test, target_names=target_labels)
print(pelny_word2vec_cbow_clf_report)

              precision    recall  f1-score   support

          15     0.6364    0.0695    0.1252      1814
          24     0.0000    0.0000    0.0000      1143
           7     0.0000    0.0000    0.0000       997
          17     0.3236    0.8419    0.4675      2410
           8     0.2978    0.3768    0.3326      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.3261      8071
   macro avg     0.2096    0.2147    0.1542      8071
weighted avg     0.2864    0.3261    0.2199      8071



In [53]:
pelny_fasttext_skipgram_clf_report = evaluate_clf(pelny_fasttext_skipgram_clf, pelny_X_test_fasttext_skipgram, wzorcowy_y_test, target_names=target_labels)
print(pelny_fasttext_skipgram_clf_report)

              precision    recall  f1-score   support

          15     0.4808    0.5099    0.4949      1814
          24     0.3787    0.2213    0.2794      1143
           7     0.4889    0.0662    0.1166       997
          17     0.4341    0.7515    0.5503      2410
           8     0.4132    0.3815    0.3967      1266
          13     0.0000    0.0000    0.0000       441

    accuracy                         0.4384      8071
   macro avg     0.3659    0.3217    0.3063      8071
weighted avg     0.4165    0.4384    0.3918      8071



In [54]:
pelny_fasttext_cbow_clf_report = evaluate_clf(pelny_fasttext_cbow_clf, pelny_X_test_fasttext_cbow, wzorcowy_y_test, target_names=target_labels)
print(pelny_fasttext_cbow_clf_report)

              precision    recall  f1-score   support

          15     0.4995    0.5788    0.5363      1814
          24     0.3936    0.2248    0.2862      1143
           7     0.4279    0.0893    0.1477       997
          17     0.4613    0.7241    0.5635      2410
           8     0.4131    0.4171    0.4151      1266
          13     0.2553    0.0272    0.0492       441

    accuracy                         0.4561      8071
   macro avg     0.4085    0.3435    0.3330      8071
weighted avg     0.4374    0.4561    0.4154      8071

