In [31]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.cluster import KMeans

In [32]:
word_vectors = Word2Vec.load("word2veC.model").wv

In [33]:
model = KMeans(n_clusters=2, max_iter=10000, random_state=True, n_init=300).fit(X=word_vectors.vectors.astype('double'))

In [34]:
word_vectors.similar_by_vector(model.cluster_centers_[1], topn=10, restrict_vocab=None)

[('الاستخدام', 0.29578840732574463),
 ('خدمات', 0.25012296438217163),
 ('العيون', 0.21679577231407166),
 ('مشكل', 0.21069826185703278),
 ('شخصيه', 0.20491507649421692),
 ('اجراء', 0.18483972549438477),
 ('المدينه', 0.18343444168567657),
 ('الاقتطاعات', 0.18326526880264282),
 ('ديالكم', 0.18043167889118195),
 ('زيدو', 0.17894932627677917)]

In [35]:
positive_cluster_index = 1
positive_cluster_center = model.cluster_centers_[positive_cluster_index]
negative_cluster_center = model.cluster_centers_[1-positive_cluster_index]

In [36]:
words = pd.DataFrame(word_vectors.key_to_index.keys())
words.columns = ['words']
words['vectors'] = words.words.apply(lambda x: word_vectors[f'{x}'])
words['cluster'] = words.vectors.apply(lambda x: model.predict([np.array(x)]))
words.cluster = words.cluster.apply(lambda x: x[0])

In [37]:
words['cluster_value'] = [1 if i==positive_cluster_index else -1 for i in words.cluster]
words['closeness_score'] = words.apply(lambda x: 1/(model.transform([x.vectors]).min()), axis=1)
words['sentiment_coeff'] = words.closeness_score * words.cluster_value

In [38]:
words.head(10)

Unnamed: 0,words,vectors,cluster,cluster_value,closeness_score,sentiment_coeff
0,بنك,"[-0.005418994, 0.002389341, 0.051573362, 0.091...",1,1,0.999916,0.999916
1,جيد,"[-0.07970168, 0.0899191, -0.0019112608, -0.019...",1,1,1.013212,1.013212
2,تطبيق,"[0.08337512, -0.045698285, -0.010953214, 0.010...",1,1,1.013912,1.013912
3,التطبيق,"[-0.05294934, -0.06847699, -0.07986015, 0.0853...",1,1,1.010318,1.010318
4,انا,"[-0.018948032, -0.051336423, 0.09207003, -0.09...",0,-1,1.005417,-1.005417
5,خدمه,"[-0.06750204, 0.039734527, 0.02030797, 0.06984...",1,1,1.012992,1.012992
6,البنك,"[0.10026633, -0.10072809, -0.0669382, 0.028670...",1,1,1.00644,1.00644
7,للغايه,"[0.0132997045, 0.06528323, 0.099654704, 0.0904...",0,-1,1.013953,-1.013953
8,الله,"[-0.04926045, -0.012710415, 0.032492895, -0.06...",1,1,1.011944,1.011944
9,شيء,"[0.0208533, 0.056586742, -0.020881848, 0.03129...",0,-1,1.00913,-1.00913


In [39]:
words[['words', 'sentiment_coeff']].to_csv('sentiment_dict.csv', index=False,encoding='utf-8-sig')