In [1]:
from joblib import load
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk

In [2]:
path = './'
mpath = 'models/'

In [3]:
# load objects
stop_words = nltk.corpus.stopwords.words('english')
stemmer = nltk.stem.snowball.EnglishStemmer()

class StemmedTfidfVectorizer(TfidfVectorizer):
    def build_analyzer(self):
        analyzer = super(TfidfVectorizer, self).build_analyzer()
        return lambda doc: (stemmer.stem(w) for w in analyzer(doc))
    
movieclf = load(mpath + 'movieclassifier.joblib')
tfidf = load(mpath + 'tfidf.joblib')
svd = load(mpath + 'svd.joblib')

In [4]:
# reload data
data = pd.read_csv(path + 'data.csv')

In [5]:
sample = data.sample(1)
title = sample['title'].values[0]
overview = sample['overview'].values
genre = sample['genre']

In [6]:
title, overview, genre

("A Woman's Face",
 array(['A woman with a disfigured face, which is reflected in her life view: she is bitter and mean-spirited, is given the chance to lead a new life thanks to plastic surgery. Will her old life and acquaintances allow her to completely change?'],
       dtype=object),
 19775    Drama
 Name: genre, dtype: object)

In [8]:
x = tfidf.transform(title + ' ' + overview)
x

<1x55162 sparse matrix of type '<class 'numpy.float64'>'
	with 21 stored elements in Compressed Sparse Row format>

In [10]:
X = svd.transform(x)

In [12]:
pred = movieclf.predict(X)
pred

array(['Drama'], dtype=object)

In [16]:
outobject = {'title': title, 'overview': overview[0], 'genre': pred[0]}

In [17]:
outobject

{'title': "A Woman's Face",
 'overview': 'A woman with a disfigured face, which is reflected in her life view: she is bitter and mean-spirited, is given the chance to lead a new life thanks to plastic surgery. Will her old life and acquaintances allow her to completely change?',
 'genre': 'Drama'}