### Deployment of the model

In [1]:
import warnings
warnings.filterwarnings("ignore")
import sklearn
from sklearn.datasets import load_files
moviedir = './dataset/movie_reviews' 
movie_reviews = load_files(moviedir, shuffle=True)
warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning)

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    movie_reviews.data, movie_reviews.target, test_size = 0.20, stratify=movie_reviews.target, random_state = 12)

In [3]:
import pickle
 
sentdir = r'./sentiment.pkl'
eclf = pickle.load(open(sentdir,'rb'))

In [4]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

In [5]:
wml_credentials = {"apikey": "AutxufMdOUD33ch9Jfw-oz3wRhMlvjLMqsrl0QPaiYcV",
                   "iam_apikey_description": "Auto-generated for key 7f5841fe-7a70-4f0e-a139-5d5f870b3ae3",
                   "iam_apikey_name": "Credenciales de servicio-1",
                   "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
                   "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/e9f9f8b864b745a4bed403ec3c89b1df::serviceid:ServiceId-915a40d7-3ec8-4584-a9ce-98789d01d9e1",
                   "instance_id": "032089cc-ca05-4fcb-8ff6-b7b94da0974f",
                   "url": "https://us-south.ml.cloud.ibm.com"}

In [6]:
client = WatsonMachineLearningAPIClient(wml_credentials)

In [7]:
props = {
    client.repository.ModelMetaNames.AUTHOR_NAME: "Horacio A. Chiarella",
    client.repository.ModelMetaNames.NAME: "txt_sentiment"
}

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    movie_reviews.data, movie_reviews.target, test_size = 0.20, stratify=movie_reviews.target, random_state = 12)

In [9]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer

vect = TfidfVectorizer()
pipeline = make_pipeline(vect,eclf)
pipeline.fit(X_train,y_train)

Pipeline(memory=None,
     steps=[('tfidfvectorizer', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_i...l=0.001, verbose=False))],
         flatten_transform=None, n_jobs=1, voting='soft', weights=None))])

In [10]:
published = client.repository.store_model(model=pipeline,
                                          meta_props=props,
                                          training_data=X_train,
                                          training_target=y_train)

In [21]:
pub_id = client.repository.get_model_uid(published)
models_details = client.repository.list_models(limit=1)

------------------------------------  -------------  ------------------------  -----------------
GUID                                  NAME           CREATED                   FRAMEWORK
01eeea66-429a-4ab6-a7c5-867c05c2dff2  txt_sentiment  2019-10-30T04:13:21.094Z  scikit-learn-0.19
------------------------------------  -------------  ------------------------  -----------------


In [22]:
loaded = client.repository.load(pub_id)

In [23]:
test_predictions = loaded.predict(X_test)

In [24]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report

print("roc_auc score: ", roc_auc_score(y_test,test_predictions))
print(classification_report(y_test,test_predictions))

roc_auc score:  0.8575
             precision    recall  f1-score   support

          0       0.86      0.85      0.86       200
          1       0.85      0.86      0.86       200

avg / total       0.86      0.86      0.86       400



#### Checking how model performs in [reviews of "Rocky 4"](https://www.rottentomatoes.com/m/rocky_iv/reviews/?type=user).

In [25]:
reviews_new = ["Stallone creates credible villains worthy of his heroic character.",
               "Another brilliant Rocky film, probably my favourite one out of the lot",
               "Yeah, this movie sucks.",
               "My favourite rocky film! So good so much heart. Slightly better than 2",
               "What has this got to do with boxing. Also everyone looked like dolls. Also if you are a real true boxing fan (not casuals), you would understand that this stupidity is no description of boxing!!",
               "The new film's narrative is stripped down to essentials, which gives it an emblematic quality.",
               "Absurdly ridiculous, this just isn't a good movie at all", 
               "Very basic and predictable but still an okay movie. No special music to save this one.", 
              "Rocky 4 is an extremely ambitious movie that is definitely worth watching.",
              'Highly beautiful',
               "If it wasn't for the robots (WTF????), and the painfully overwritten lines of an absurdly dogmatic persuasion, then this would otherwise be nothing more than an interminable series of mildly rousing montages. There are some unintentionally funny bits though, and Dolph's Ivan showcases the best and worst of all Rocky's opponents.",
              "While all aspects of realism is thrown out the window, ROCKY IV is an adrenaline rush of action and excitment, with an incredible soundtrack and arguably the best movie fight in history between Balboa and Drago",
              "Just like the songs, exercise routines and repetitive clips, it seems redundant to add another installment in this already falling franchise when you clearly lack material. Rocky IV is petty, childish and seems overlong despite of its 91 minutes of run time for it merely has an idea of a TV drama episode which is stretched to a point of exhaustion. Its painful to watch Sylvester Stallone go through this enormous amount of training and hardly make a point out there. He fails on all the levels here; writer, director and actor, to deliver any loose end of the thread for the audience to hang on to. Rocky IV is predictable, loosely written and choreographed and executed unsupervised."]

In [26]:
pred = loaded.predict(reviews_new)

In [27]:
for review, category in zip(reviews_new, pred):
    print('%r => %s \n' % (review, movie_reviews.target_names[category]))

'Stallone creates credible villains worthy of his heroic character.' => neg 

'Another brilliant Rocky film, probably my favourite one out of the lot' => pos 

'Yeah, this movie sucks.' => neg 

'My favourite rocky film! So good so much heart. Slightly better than 2' => pos 

'What has this got to do with boxing. Also everyone looked like dolls. Also if you are a real true boxing fan (not casuals), you would understand that this stupidity is no description of boxing!!' => neg 

"The new film's narrative is stripped down to essentials, which gives it an emblematic quality." => neg 

"Absurdly ridiculous, this just isn't a good movie at all" => neg 

'Very basic and predictable but still an okay movie. No special music to save this one.' => neg 

'Rocky 4 is an extremely ambitious movie that is definitely worth watching.' => pos 

'Highly beautiful' => pos 

"If it wasn't for the robots (WTF????), and the painfully overwritten lines of an absurdly dogmatic persuasion, then this would oth