In [85]:
import pandas as pd
from sklearn.externals import joblib
import category_encoders as ce
import sys
import os
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
import re  

# BinEncoder = joblib.load('BinEncoder.pkl') 
clf = joblib.load('RF_generos2.pkl')

YearBinaryEnco = joblib.load('YearBinaryEnco.pkl')
tfidf_plot = joblib.load('tfidf_plot.pkl')
tfidf_title = joblib.load('tfidf_title.pkl')


def clasff_movie(title, plot, year):
    
    
    #Preprocess
    title_clean = text_clean(title, remove_stop_words=False)
    plot_clean  = text_clean(plot)
    
    #Transform
    YearBinary = YearBinaryEnco.transform(pd.DataFrame([year],columns=["year"]))
    title_tfidf_dtm = tfidf_title.transform(pd.Series([title_clean]))
    title_feat_tfidf = pd.DataFrame(title_tfidf_dtm.toarray(), columns=tfidf_title.get_feature_names())

    plot_tfidf_dtm = tfidf_plot.transform(pd.Series([plot_clean]))
    plot_feat_tfidf = pd.DataFrame(plot_tfidf_dtm.toarray(), columns=tfidf_plot.get_feature_names())

    #Create a dataframe 
    df_ = pd.concat([title_feat_tfidf, 
                  plot_feat_tfidf, 
                  YearBinary], axis=1) 
    
    #Predict
    predict_ = clf.predict_proba(df_)

    return predict_


# This function transform the text in order get ready data, remove stop words, stimming, Lemmatisation and n_grams
def text_clean(text, remove_stop_words=True):
    wordnet_lemmatizer = WordNetLemmatizer()
    stemmer = SnowballStemmer('english')
    document = text
    
    # Remove all the special characters
    document = re.sub(r'\W', ' ', document)

    # remove all single characters
    document = re.sub(r'\s+[a-zA-Z]\s+', ' ', document)

    # Remove single characters from the start
    document = re.sub(r'\^[a-zA-Z]\s+', ' ', document) 

    # Substituting multiple spaces with single space
    document = re.sub(r'\s+', ' ', document, flags=re.I)

    # Removing prefixed 'b'
    document = re.sub(r'^b\s+', '', document)
    
    #Removing punctuation
    document = re.sub(r'[^\w\s]', '', document)

    #LowerCase    
    document = document.lower()
    
    #Split document word a word
    words_document = text.split()
    
    #Remove clean_words
    words_document = [word for word in words_document if word not in waste_words]
    #Remove stop words
    if remove_stop_words:
        words_document = [word for word in words_document if word not in custom_stopwords]
    
    #stimming
    words_document = [stemmer.stem(word) for word in words_document]
    
    #Lemmatisation
    words_document = [wordnet_lemmatizer.lemmatize(word) for word in words_document]
    words_document = [wordnet_lemmatizer.lemmatize(word, pos='v') for word in words_document]
           
    return ' '.join(words_document)

custom_stopwords =['a', 'about', 'above', 'across', 'after', 'afterwards', 'again',
       'against', 'ain', 'all', 'almost', 'alone', 'along', 'already',
       'also', 'although', 'always', 'am', 'among', 'amongst', 'amoungst',
       'amount', 'an', 'and', 'another', 'any', 'anyhow', 'anyone',
       'anything', 'anyway', 'anywhere', 'are', 'aren', "aren't",
       'around', 'as', 'at', 'back', 'be', 'became', 'because', 'become',
       'becomes', 'becoming', 'been', 'before', 'beforehand', 'behind',
       'being', 'below', 'beside', 'besides', 'between', 'beyond', 'bill',
       'both', 'bottom', 'but', 'by', 'call', 'can', 'cannot', 'cant',
       'co', 'con', 'could', 'couldn', "couldn't", 'couldnt', 'cry', 'd',
       'de', 'describe', 'detail', 'did', 'didn', "didn't", 'do', 'does',
       'doesn', "doesn't", 'doing', 'don', "don't", 'done', 'down', 'due',
       'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else',
       'elsewhere', 'empty', 'enough', 'etc', 'even', 'ever', 'every',
       'everyone', 'everything', 'everywhere', 'except', 'few', 'fifteen',
       'fifty', 'fill', 'find', 'fire', 'first', 'five', 'for', 'former',
       'formerly', 'forty', 'found', 'four', 'from', 'front', 'full',
       'further', 'get', 'give', 'go', 'had', 'hadn', "hadn't", 'has',
       'hasn', "hasn't", 'hasnt', 'have', 'haven', "haven't", 'having',
       'he', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein',
       'hereupon', 'hers', 'herself', 'him', 'himself', 'his', 'how',
       'however', 'hundred', 'i', 'ie', 'if', 'in', 'inc', 'indeed',
       'interest', 'into', 'is', 'isn', "isn't", 'it', "it's", 'its',
       'itself', 'just', 'keep', 'last', 'latter', 'latterly', 'least',
       'less', 'll', 'ltd', 'm', 'ma', 'made', 'many', 'may', 'me',
       'meanwhile', 'might', 'mightn', "mightn't", 'mill', 'mine', 'more',
       'moreover', 'most', 'mostly', 'move', 'much', 'must', 'mustn',
       "mustn't", 'my', 'myself', 'name', 'namely', 'needn', "needn't",
       'neither', 'never', 'nevertheless', 'next', 'nine', 'no', 'nobody',
       'none', 'noone', 'nor', 'not', 'nothing', 'now', 'nowhere', 'o',
       'of', 'off', 'often', 'on', 'once', 'one', 'only', 'onto', 'or',
       'other', 'others', 'otherwise', 'our', 'ours', 'ourselves', 'out',
       'over', 'own', 'part', 'per', 'perhaps', 'please', 'put', 'rather',
       're', 's', 'same', 'see', 'seem', 'seemed', 'seeming', 'seems',
       'serious', 'several', 'shan', "shan't", 'she', "she's", 'should',
       "should've", 'shouldn', "shouldn't", 'show', 'side', 'since',
       'sincere', 'six', 'sixty', 'so', 'some', 'somehow', 'someone',
       'something', 'sometime', 'sometimes', 'somewhere', 'still', 'such',
       'system', 't', 'take', 'ten', 'than', 'that', "that'll", 'the',
       'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there',
       'thereafter', 'thereby', 'therefore', 'therein', 'thereupon',
       'these', 'they', 'thick', 'thin', 'third', 'this', 'those',
       'though', 'three', 'through', 'throughout', 'thru', 'thus', 'to',
       'together', 'too', 'top', 'toward', 'towards', 'twelve', 'twenty',
       'two', 'un', 'under', 'until', 'up', 'upon', 'us', 've', 'very',
       'via', 'was', 'wasn', "wasn't", 'we', 'well', 'were', 'weren',
       "weren't", 'what', 'whatever', 'when', 'whence', 'whenever',
       'where', 'whereafter', 'whereas', 'whereby', 'wherein',
       'whereupon', 'wherever', 'whether', 'which', 'while', 'whither',
       'who', 'whoever', 'whole', 'whom', 'whose', 'why', 'will', 'with',
       'within', 'without', 'won', "won't", 'would', 'wouldn', "wouldn't",
       'y', 'yet', 'you', "you'd", "you'll", "you're", "you've", 'your',
       'yours', 'yourself', 'yourselves']

waste_words =['!',
 '"',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '=',
 '?',
 'a$$',
 'a&m',
 'aa',
 'aaa',
 'aam',
 '+',
 'aang']

In [61]:
#Preprocess
title_clean = text_clean("A Woman's Face", remove_stop_words=False)
plot_clean  = text_clean("in sweden ,  a female blackmailer with a disfiguring facial scar meets a gentleman who lives beyond his means .  they become accomplices in blackmail ,  and she falls in love with him ,  bitterly resigned to the impossibility of his returning her affection .  her life changes when one of her victims proves to be the wife of a plastic surgeon ,  who catches her in his apartment ,  but believes her to be a jewel thief rather than a blackmailer .  he offers her the chance to look like a normal woman again ,  and she accepts ,  despite the agony of multiple operations .  meanwhile ,  her gentleman accomplice forms an evil scheme to rid himself of the one person who stands in his way to a fortune  -  his four - year - old - nephew .")
year="2018"

In [81]:


#Transform
YearBinary = YearBinaryEnco.transform(pd.DataFrame([year],columns=["year"]))
title_tfidf_dtm = tfidf_title.transform(pd.Series([title_clean]))
title_feat_tfidf = pd.DataFrame(title_tfidf_dtm.toarray(), columns=tfidf_title.get_feature_names())
print("title_feat_tfidf: " + str(title_feat_tfidf.shape))

plot_tfidf_dtm = tfidf_plot.transform(pd.Series([plot_clean]))
plot_feat_tfidf = pd.DataFrame(plot_tfidf_dtm.toarray(), columns=tfidf_plot.get_feature_names())
print("plot_feat_tfidf: " + str(plot_feat_tfidf.shape))

#Create a dataframe 
df_ = pd.concat([plot_feat_tfidf.add_suffix('_1'), 
                      title_feat_tfidf.add_suffix('_2'), 
                      YearBinary], axis=1) 

df_.shape

title_feat_tfidf: (1, 4172)
plot_feat_tfidf: (1, 8951)


(1, 13131)

In [46]:

ye = "2006"
df = pd.DataFrame([ye],columns=["year"])
df
YearBinary = YearBinaryEnco.transform(df)
YearBinary

Unnamed: 0,year_0,year_1,year_2,year_3,year_4,year_5,year_6,year_7
0,0,0,0,1,1,1,1,0


In [86]:
clasff_movie("A Woman's Face", 
             "in sweden ,  a female blackmailer with a disfiguring facial scar meets a gentleman who lives beyond his means .  they become accomplices in blackmail ,  and she falls in love with him ,  bitterly resigned to the impossibility of his returning her affection .  her life changes when one of her victims proves to be the wife of a plastic surgeon ,  who catches her in his apartment ,  but believes her to be a jewel thief rather than a blackmailer .  he offers her the chance to look like a normal woman again ,  and she accepts ,  despite the agony of multiple operations .  meanwhile ,  her gentleman accomplice forms an evil scheme to rid himself of the one person who stands in his way to a fortune  -  his four - year - old - nephew ."
             ,"1941")

array([[0.13020362, 0.17560571, 0.10731402, 0.11716939, 0.31402017,
        0.13158499, 0.14782341, 0.43374948, 0.13574491, 0.09581885,
        0.04761875, 0.10992976, 0.12649445, 0.09018483, 0.07469877,
        0.06921122, 0.04086857, 0.19386311, 0.06069164, 0.05780504,
        0.06417878, 0.18054495, 0.04686146, 0.06250829]])

In [87]:
#!/usr/bin/python
from flask import Flask
from flask_restplus import Api, Resource, fields
from sklearn.externals import joblib
from Model_Movie_CLF import clasff_movie

app = Flask(__name__)

api = Api(
    app, 
    version='1.0', 
    title='Movie Genre Classification',
    description='Desarrollado por: Iván Gómez, Cristian Najera, Natalia Martínez')

ns = api.namespace('Classification', 
     description='Classification movie')
   
parser = api.parser()

parser.add_argument(
    'Title', 
    type=str, 
    required=True, 
    help='Title of the movie', 
    location='args')

parser.add_argument(
    'Plot', 
    type=str, 
    required=True, 
    help='Description of the movie', 
    location='args')

parser.add_argument(
    'Year', 
    type=str, 
    required=True, 
    help='Year of the movie', 
    location='args')


resource_fields = api.model('Resource', {
    'result': fields.String,
})

@ns.route('/')
class PredPriceApi(Resource):

    @api.doc(parser=parser)
    @api.marshal_with(resource_fields)
    def get(self):
        args = parser.parse_args()
        
        return {
         "result": clasff_movie(args['Title'],args['Plot'],args['Year'])
        }, 200
    
    
if __name__ == '__main__':
    app.run(debug=True, use_reloader=False, host='0.0.0.0', port=8889)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Running on http://0.0.0.0:8889/ (Press CTRL+C to quit)
127.0.0.1 - - [21/Apr/2019 16:58:42] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [21/Apr/2019 16:58:42] "GET /swagger.json HTTP/1.1" 200 -
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
  n_jobs = min(effective_n_jobs(n_jobs),