In [None]:
# import libraries
from gensim.models import Doc2Vec
import gensim
from gensim.models.doc2vec import TaggedDocument
import multiprocessing
from sklearn import utils
import pandas as pd
from gensim.models.phrases import Phrases, Phraser
import numpy as np
import xgboost 
from tqdm import tqdm
from sklearn.model_selection import train_test_split
tqdm.pandas(desc="progress-bar")
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn import decomposition, ensemble
from collections import Counter
from nltk import ngrams

  from pandas import Panel


In [None]:
def classification_report(x_train, x_test, y_train, y_test):
  models = []
  models.append(('LogisticRegression', linear_model.LogisticRegression(solver='newton-cg',multi_class='multinomial')))
  models.append(('RandomForest', ensemble.RandomForestClassifier(n_estimators=100)))

  for name, model in models:
      clf=model
      clf.fit(x_train, y_train)
      y_pred=clf.predict(x_test)
      print(f"{name}:")
      print(f"accuracy: {metrics.accuracy_score(y_pred=y_pred, y_true=y_test)}")
      print(f"precision: {metrics.precision_score(y_pred=y_pred, y_true=y_test, average='macro')}")
      print(f"recall: {metrics.recall_score(y_pred=y_pred, y_true=y_test, average='macro')}")
      print(f"{metrics.classification_report(y_pred=y_pred, y_true=y_test)}")

In [None]:
def get_word_counts(data):
  words = data.tweet.to_string().split()
  return Counter(words)
  

In [None]:
# word2vec hyper parameters
# min_count = corpusta kelimenin en az bulunma sayısı eğer kelime bu kadardan az geçiyorsa anlam ifade etmediği varsayılır(default=5)
# vector_size = kelimelerin ifade edileceği vektörün boyut sayısı
# window = current ve predicted word arasındaki maksimum mesafe
# sg = 0 cbow, 1 skip-gram
# negative = eğer sıfırdan büyük olursa negative sampling kullanılır 5-20 arasında olmalı
# alpha = başlangıç learning rate
# min_alpha = eğitim aşamasında learning rate linear olarak bunu düşer
# epoch = iterasyon sayısı


In [None]:
def labelize_tweets_ug(tweets,label):
    result = []
    prefix = label
    for i, t in zip(tweets.index, tweets):
        result.append(TaggedDocument(t.split(), [prefix + '_%s' % i]))
    return result

In [None]:
def train_doc2vec(corpus, n_epoch, name_corpus, vector_size, negative, window, min_count, alpha, min_alpha):
  cores = multiprocessing.cpu_count()
  model = Doc2Vec(size=vector_size, negative=negative, window=window, min_count=min_count, workers=cores, alpha=alpha, min_alpha=min_alpha)
  model.build_vocab(corpus)

  for epoch in range(n_epoch):
    model.train(utils.shuffle(corpus), total_examples=len(corpus), epochs=1)
    model.alpha -= 0.002
    model.min_alpha = model.alpha

  model.save(f"/content/drive/MyDrive/hesaplamalı_anlambilim_ödev/trained_embeddings/Doc2Vec_{name_corpus}_size_{vector_size}_window_{window}_min_count_{min_count}.model")
  return model

In [None]:
def get_mean_vector(model, words):
    # remove out-of-vocabulary words
    words = [word for word in words if word in model.wv]
    if len(words) >= 1:
        return np.mean(model[words], axis=0)
    else:
        return np.zeros((1, model.vector_size))

In [None]:
def get_vectors(model, corpus):
  vectors = []
  for sentence in corpus:
      vec = get_mean_vector(model, sentence)
      vectors.append(vec)
  return vectors

In [None]:
def get_max_len_sentence(series):
  res = series.str.split().str.len().max()

  print(f"The maximum length in words are : {res}") 

**TWEET METINLERI İÇİN WORD2VEC BENCHMARK**

In [None]:
tweet_train = pd.read_csv("/content/drive/MyDrive/hesaplamalı_anlambilim_ödev/preprocess_train.csv")
tweet_test = pd.read_csv("/content/drive/MyDrive/hesaplamalı_anlambilim_ödev/preprocess_test.csv")

In [None]:
tweet_test.dropna(inplace=True)
tweet_test.reset_index(drop=True,inplace=True)

tweet_train.dropna(inplace=True)
tweet_train.reset_index(drop=True,inplace=True)

x_train=tweet_train.tweet
y_train=tweet_train.sentiment.map({'olumlu':1,'olumsuz':-1,'notr':0}).values
x_test=tweet_test.tweet
y_test=tweet_test.sentiment.map({'olumlu':1,'olumsuz':-1,'notr':0}).values

In [None]:
concat = pd.concat([x_train, x_test])
corpus = labelize_tweets_ug(concat, 'all')

In [None]:
corpus_train = pd.DataFrame(x_train)['tweet'].apply(lambda x: x.split())
corpus_test = pd.DataFrame(x_test)['tweet'].apply(lambda x: x.split())

In [None]:
# min_count = corpusta kelimenin en az bulunma sayısı eğer kelime bu kadardan az geçiyorsa anlam ifade etmediği varsayılır(default=5)
# vector_size = kelimelerin ifade edileceği vektörün boyut sayısı
# window = current ve predicted word arasındaki maksimum mesafe
# sg = 0 cbow, 1 skip-gram
# negative = eğer sıfırdan büyük olursa negative sampling kullanılır 5-20 arasında olmalı
# alpha = başlangıç learning rate
# min_alpha = eğitim aşamasında learning rate linear olarak bunu düşer
# epoch = iterasyon sayısı

In [None]:
get_max_len_sentence(pd.DataFrame(concat).tweet)

The maximum length in words are : 27


# epoch=25 negative=5 için 
# alpha 0.5 0.01 0.05 0.1 her biri window = 3, vector_size = 150 kullanılacak 
# window 3 5 7 her biri alpha = 0.05, vector_size = 150 kullanılacak 
# vector_size 25 50 150 200 her biri  için window = 3, alpha = 0.05

In [None]:
model_1 = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.5,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model_1,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model_1,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

  
  


In [None]:
classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

LogisticRegression:
accuracy: 0.5414733178654292
precision: 0.5404085333694485
recall: 0.5241725703838229
              precision    recall  f1-score   support

          -1       0.55      0.68      0.61      1373
           0       0.52      0.48      0.49      1160
           1       0.55      0.42      0.47       915

    accuracy                           0.54      3448
   macro avg       0.54      0.52      0.53      3448
weighted avg       0.54      0.54      0.54      3448

RandomForest:
accuracy: 0.5646751740139211
precision: 0.5735727991299947
recall: 0.5414843183063426
              precision    recall  f1-score   support

          -1       0.55      0.75      0.64      1373
           0       0.56      0.47      0.51      1160
           1       0.61      0.40      0.48       915

    accuracy                           0.56      3448
   macro avg       0.57      0.54      0.54      3448
weighted avg       0.57      0.56      0.55      3448



# alpha(learning rate) = 0.01

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.01,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

  
  


In [None]:
classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

LogisticRegression:
accuracy: 0.44576566125290024
precision: 0.48117458352468234
recall: 0.40303354605059494
              precision    recall  f1-score   support

          -1       0.43      0.79      0.56      1373
           0       0.45      0.27      0.34      1160
           1       0.56      0.14      0.23       915

    accuracy                           0.45      3448
   macro avg       0.48      0.40      0.38      3448
weighted avg       0.47      0.45      0.40      3448

RandomForest:
accuracy: 0.4756380510440835
precision: 0.4735616294692864
recall: 0.4545301551044114
              precision    recall  f1-score   support

          -1       0.49      0.65      0.56      1373
           0       0.44      0.39      0.42      1160
           1       0.49      0.32      0.39       915

    accuracy                           0.48      3448
   macro avg       0.47      0.45      0.45      3448
weighted avg       0.47      0.48      0.47      3448



# learning rate 0.05

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

  
  


In [None]:
classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

LogisticRegression:
accuracy: 0.6209396751740139
precision: 0.6183514683778863
recall: 0.6046258479080957
              precision    recall  f1-score   support

          -1       0.63      0.76      0.69      1373
           0       0.61      0.55      0.58      1160
           1       0.61      0.51      0.55       915

    accuracy                           0.62      3448
   macro avg       0.62      0.60      0.61      3448
weighted avg       0.62      0.62      0.62      3448

RandomForest:
accuracy: 0.6276102088167054
precision: 0.6329431530048656
recall: 0.6086500978772053
              precision    recall  f1-score   support

          -1       0.62      0.78      0.69      1373
           0       0.63      0.55      0.59      1160
           1       0.65      0.49      0.56       915

    accuracy                           0.63      3448
   macro avg       0.63      0.61      0.61      3448
weighted avg       0.63      0.63      0.62      3448



# learning rate 0.1

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.1,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

  
  


In [None]:
classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

LogisticRegression:
accuracy: 0.627030162412993
precision: 0.6254648710300793
recall: 0.6129973167770785
              precision    recall  f1-score   support

          -1       0.63      0.75      0.68      1373
           0       0.62      0.56      0.59      1160
           1       0.62      0.53      0.57       915

    accuracy                           0.63      3448
   macro avg       0.63      0.61      0.62      3448
weighted avg       0.63      0.63      0.62      3448

RandomForest:
accuracy: 0.6209396751740139
precision: 0.6329067454002998
recall: 0.5954830325057722
              precision    recall  f1-score   support

          -1       0.60      0.82      0.69      1373
           0       0.64      0.54      0.59      1160
           1       0.66      0.43      0.52       915

    accuracy                           0.62      3448
   macro avg       0.63      0.60      0.60      3448
weighted avg       0.63      0.62      0.61      3448



# window = 3

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

  
  


In [None]:
classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

LogisticRegression:
accuracy: 0.6194895591647331
precision: 0.6183849363197189
recall: 0.6027597753346332
              precision    recall  f1-score   support

          -1       0.63      0.76      0.69      1373
           0       0.61      0.55      0.58      1160
           1       0.62      0.50      0.55       915

    accuracy                           0.62      3448
   macro avg       0.62      0.60      0.61      3448
weighted avg       0.62      0.62      0.61      3448

RandomForest:
accuracy: 0.6252900232018561
precision: 0.6287557206956871
recall: 0.6047513217688594
              precision    recall  f1-score   support

          -1       0.62      0.79      0.69      1373
           0       0.63      0.55      0.58      1160
           1       0.64      0.48      0.55       915

    accuracy                           0.63      3448
   macro avg       0.63      0.60      0.61      3448
weighted avg       0.63      0.63      0.62      3448



# window size = 5

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 5,
                         vector_size=150,
                         min_count=2)



In [None]:
vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)

  
  


LogisticRegression:
accuracy: 0.6203596287703016
precision: 0.6184036320174436
recall: 0.6045127896831098
              precision    recall  f1-score   support

          -1       0.63      0.76      0.69      1373
           0       0.61      0.55      0.58      1160
           1       0.62      0.51      0.56       915

    accuracy                           0.62      3448
   macro avg       0.62      0.60      0.61      3448
weighted avg       0.62      0.62      0.62      3448

RandomForest:
accuracy: 0.6281902552204176
precision: 0.6315091792195869
recall: 0.6103954450902496
              precision    recall  f1-score   support

          -1       0.62      0.78      0.69      1373
           0       0.63      0.55      0.59      1160
           1       0.65      0.50      0.57       915

    accuracy                           0.63      3448
   macro avg       0.63      0.61      0.61      3448
weighted avg       0.63      0.63      0.62      3448



# window size = 7

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 7,
                         vector_size=150,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.6316705336426914
precision: 0.6302541836655527
recall: 0.6164201003287674
              precision    recall  f1-score   support

          -1       0.64      0.76      0.69      1373
           0       0.62      0.56      0.59      1160
           1       0.63      0.53      0.57       915

    accuracy                           0.63      3448
   macro avg       0.63      0.62      0.62      3448
weighted avg       0.63      0.63      0.63      3448

RandomForest:
accuracy: 0.6310904872389791
precision: 0.6342579334545545
recall: 0.6129936021506041
              precision    recall  f1-score   support

          -1       0.62      0.78      0.69      1373
           0       0.64      0.56      0.60      1160
           1       0.64      0.50      0.56       915

    accuracy                           0.63      3448
   macro avg       0.63      0.61      0.62      3448
weighted avg       0.63      0.63      0.63      3448



# vector size = 25


In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=25,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.580046403712297
precision: 0.574468053238078
recall: 0.5634051765881732
              precision    recall  f1-score   support

          -1       0.60      0.73      0.66      1373
           0       0.56      0.49      0.52      1160
           1       0.56      0.47      0.51       915

    accuracy                           0.58      3448
   macro avg       0.57      0.56      0.56      3448
weighted avg       0.58      0.58      0.57      3448

RandomForest:
accuracy: 0.6232598607888631
precision: 0.6252548529301244
recall: 0.6045589983555458
              precision    recall  f1-score   support

          -1       0.62      0.77      0.69      1373
           0       0.61      0.56      0.58      1160
           1       0.64      0.49      0.55       915

    accuracy                           0.62      3448
   macro avg       0.63      0.60      0.61      3448
weighted avg       0.62      0.62      0.62      3448



# vector  size = 50

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=50,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.597737819025522
precision: 0.5950555947585601
recall: 0.5796655536968699
              precision    recall  f1-score   support

          -1       0.61      0.75      0.67      1373
           0       0.58      0.52      0.55      1160
           1       0.59      0.47      0.52       915

    accuracy                           0.60      3448
   macro avg       0.60      0.58      0.58      3448
weighted avg       0.60      0.60      0.59      3448

RandomForest:
accuracy: 0.6241299303944315
precision: 0.6278673351151985
recall: 0.605032072277116
              precision    recall  f1-score   support

          -1       0.62      0.78      0.69      1373
           0       0.62      0.55      0.59      1160
           1       0.64      0.49      0.55       915

    accuracy                           0.62      3448
   macro avg       0.63      0.61      0.61      3448
weighted avg       0.63      0.62      0.62      3448



# vector size = 100

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=100,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.6142691415313225
precision: 0.6109570023197041
recall: 0.5969559573848169
              precision    recall  f1-score   support

          -1       0.63      0.75      0.68      1373
           0       0.60      0.56      0.58      1160
           1       0.60      0.49      0.54       915

    accuracy                           0.61      3448
   macro avg       0.61      0.60      0.60      3448
weighted avg       0.61      0.61      0.61      3448

RandomForest:
accuracy: 0.6235498839907193
precision: 0.6282551143730966
recall: 0.6034607578304624
              precision    recall  f1-score   support

          -1       0.61      0.79      0.69      1373
           0       0.63      0.55      0.58      1160
           1       0.64      0.48      0.55       915

    accuracy                           0.62      3448
   macro avg       0.63      0.60      0.61      3448
weighted avg       0.63      0.62      0.62      3448



# vector size = 150

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=150,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.6116589327146171
precision: 0.6094622155468646
recall: 0.5955526005518966
              precision    recall  f1-score   support

          -1       0.62      0.74      0.68      1373
           0       0.60      0.55      0.57      1160
           1       0.61      0.50      0.55       915

    accuracy                           0.61      3448
   macro avg       0.61      0.60      0.60      3448
weighted avg       0.61      0.61      0.61      3448

RandomForest:
accuracy: 0.6200696055684455
precision: 0.62714306918924
recall: 0.6003849784897052
              precision    recall  f1-score   support

          -1       0.61      0.79      0.68      1373
           0       0.62      0.53      0.57      1160
           1       0.65      0.49      0.56       915

    accuracy                           0.62      3448
   macro avg       0.63      0.60      0.60      3448
weighted avg       0.62      0.62      0.61      3448



# vector size = 200

In [None]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.05,
                         min_alpha = 0.065,
                         window = 3,
                         vector_size=200,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.6203596287703016
precision: 0.6188806813077927
recall: 0.6046385940632858
              precision    recall  f1-score   support

          -1       0.63      0.75      0.68      1373
           0       0.61      0.56      0.58      1160
           1       0.62      0.51      0.56       915

    accuracy                           0.62      3448
   macro avg       0.62      0.60      0.61      3448
weighted avg       0.62      0.62      0.62      3448

RandomForest:
accuracy: 0.6354408352668214
precision: 0.6393845501177238
recall: 0.6157925457510918
              precision    recall  f1-score   support

          -1       0.62      0.80      0.70      1373
           0       0.65      0.55      0.60      1160
           1       0.64      0.50      0.56       915

    accuracy                           0.64      3448
   macro avg       0.64      0.62      0.62      3448
weighted avg       0.64      0.64      0.63      3448



# en iyiler alpha=0.1, window_size=7, vector_size=200

In [50]:
model = train_doc2vec(corpus=corpus, 
                         n_epoch=25, 
                         name_corpus="tweet", 
                         negative=5,
                         alpha = 0.1,
                         min_alpha = 0.065,
                         window = 7,
                         vector_size=200,
                         min_count=2)


vectors_train = get_vectors(model=model,
                            corpus=corpus_train)
vectors_test = get_vectors(model=model,
                            corpus=corpus_test)

X_train = np.array(vectors_train)
X_train = np.vstack(X_train)
X_test = np.array(vectors_test)
X_test = np.vstack(X_test)

classification_report(x_train=X_train,
                      x_test=X_test,
                      y_train=y_train,
                      y_test=y_test)



LogisticRegression:
accuracy: 0.630800464037123
precision: 0.6277542011171744
recall: 0.6146182240032301
              precision    recall  f1-score   support

          -1       0.64      0.77      0.70      1373
           0       0.61      0.56      0.59      1160
           1       0.62      0.52      0.57       915

    accuracy                           0.63      3448
   macro avg       0.63      0.61      0.62      3448
weighted avg       0.63      0.63      0.63      3448

RandomForest:
accuracy: 0.6412412993039444
precision: 0.6577827530078119
recall: 0.6171117585174395
              precision    recall  f1-score   support

          -1       0.61      0.83      0.70      1373
           0       0.67      0.55      0.60      1160
           1       0.69      0.47      0.56       915

    accuracy                           0.64      3448
   macro avg       0.66      0.62      0.62      3448
weighted avg       0.65      0.64      0.63      3448

