In [4]:
from gensim.models import Word2Vec
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

In [5]:
"""
This is a class to transform text into Vector using Word2Vec library.
"""

class Word2VectorTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, vector_size=100, window=5, min_count=1, workers=4):
        self.vector_size = vector_size
        self.window = window
        self.min_count = min_count
        self.workers = workers
        self.model = None

    def fit(self, X, y=None):
        sentences = [sentence.split() for sentence in X]
        self.model = Word2Vec(sentences, vector_size=self.vector_size, window=self.window,
                             min_count=self.min_count, workers=self.workers)
        return self

    def transform(self, X):
        return np.array([self._get_feature_vector(sentence) for sentence in X])

    def _get_feature_vector(self, sentence):
        words = sentence.split()
        words = [word for word in words if word in self.model.wv]
        if not words:
            return np.zeros(self.vector_size)
        return np.mean([self.model.wv[word] for word in words], axis=0)
    