In [1]:
import pandas as pd
import numpy as np

In [9]:
bait = pd.read_csv('dataset/clickbait_data', sep="\n", header=None, names=['title'])

In [11]:
bait['bait'] = 1

In [12]:
bait.head()

Unnamed: 0,title,bait
0,Should I Get Bings,1
1,Which TV Female Friend Group Do You Belong In,1
2,"The New ""Star Wars: The Force Awakens"" Trailer...",1
3,"This Vine Of New York On ""Celebrity Big Brothe...",1
4,A Couple Did A Stunning Photo Shoot With Their...,1


In [13]:
nobait = pd.read_csv('dataset/non_clickbait_data', sep="\n", header=None, names=['title'] )

In [14]:
nobait['bait'] = 0

In [15]:
nobait.head()

Unnamed: 0,title,bait
0,Bill Changing Credit Card Rules Is Sent to Oba...,0
1,"In Hollywood, the Easy-Money Generation Toughe...",0
2,1700 runners still unaccounted for in UK's Lak...,0
3,Yankees Pitchers Trade Fielding Drills for Put...,0
4,Large earthquake rattles Indonesia; Seventh in...,0


In [17]:
db = pd.concat([clickbait,non_clickbait])

In [18]:
db.head()

Unnamed: 0,title,bait
0,Should I Get Bings,1
1,Which TV Female Friend Group Do You Belong In,1
2,"The New ""Star Wars: The Force Awakens"" Trailer...",1
3,"This Vine Of New York On ""Celebrity Big Brothe...",1
4,A Couple Did A Stunning Photo Shoot With Their...,1


In [19]:
db.tail()

Unnamed: 0,title,bait
15996,"To Make Female Hearts Flutter in Iraq, Throw a...",0
15997,"British Liberal Democrat Patsy Calton, 56, die...",0
15998,Drone smartphone app to help heart attack vict...,0
15999,"Netanyahu Urges Pope Benedict, in Israel, to D...",0
16000,Computer Makers Prepare to Stake Bigger Claim ...,0


In [20]:
from sklearn.utils import shuffle
db = shuffle(db, random_state=26).reset_index(drop=True)

In [22]:
db.head(),db.tail()

(                                               title  bait
 0  Justin Trudeau Meets Taylor Swift In This Hila...     1
 1  20 killed and over 40 missing as overcrowded b...     0
 2        Former SA Deputy President Appears In Court     0
 3                     Morgan Stanley Seeks Successor     0
 4         Bomb blast in Iran injures three civilians     0,
                                                    title  bait
 31995  These Cops Took Care Of A Sick Woman's Five Ch...     1
 31996  This Word Association Test Will Tell You If Yo...     1
 31997  China Says Rio Tinto Bribed Most Big Steel Makers     0
 31998                  NATO deploys helicopters in Libya     0
 31999       Advertisers Change Game Plans for Super Bowl     0)

In [23]:
db.describe()

Unnamed: 0,bait
count,32000.0
mean,0.499969
std,0.500008
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [24]:
from sklearn.model_selection import train_test_split
X = db.title
y = db.bait
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [25]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB

pipeline1 = Pipeline([('vect', CountVectorizer()),
            ('tfidf', TfidfTransformer()),
            ('mnb', MultinomialNB())])

In [26]:
pipeline1.fit(X, y)

Pipeline(memory=None,
     steps=[('vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip...inear_tf=False, use_idf=True)), ('mnb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])

In [27]:
predicted = pipeline1.predict(X_test)

In [28]:
print (np.mean(predicted == y_test))

0.98125


In [38]:
pipeline1.predict(["You won’t believe how these 9 shocking clickbaits work! "])[0]

1

In [37]:
pipeline1.predict_proba(["You won’t believe how these 9 shocking clickbaits work! "])[0]

array([0.00276704, 0.99723296])

In [39]:
pipeline1.predict(["Live at the Alchemist’s Kitchen: ‘Moon Over Matter’ With Dr. Mark Filippi"])[0]

0

In [40]:
pipeline1.predict_proba(["Live at the Alchemist’s Kitchen: ‘Moon Over Matter’ With Dr. Mark Filippi"])[0]

array([0.60805137, 0.39194863])

In [41]:
from sklearn.externals import joblib

In [42]:
import pickle

In [45]:
joblib.dump(pipeline1, 'clickbaitmodelsklearn.pkl') 

['clickbaitmodelsklearn.pkl']

In [46]:
m1=joblib.load('clickbaitmodelsklearn.pkl')

In [52]:
pred_score = m1.predict_proba(['You won’t believe how these 9 shocking clickbaits work!'])[0]
pred_score

array([0.00276704, 0.99723296])

In [53]:
prob = {'Clickbate': pred_score[1], 'Not-Clickbate': pred_score[0]}

In [58]:
def score(input_string):
   
    pred_score = m1.predict_proba([input_string])[0]
    if ((pred_score[1]*100 > 40) & ((pred_score[1]*100 < 60))):
        flag = 'Maybe Baity'
        color = 'warn-color'
    elif (pred_score[1]*100 > 60):
        flag = 'Looks Safe'
        color = 'safe-color'
    else:
        flag = 'Sounds Baity'
        color = 'danger-color'

    prob = {'clickbate-prob': pred_score[1], 'notClickbate-prob': pred_score[0], 'flag': flag, 'color':color}
    return prob

In [59]:
a = score("You won’t believe how these 9 shocking clickbaits work!")

In [60]:
a

{'clickbate-prob': 0.9972329589125749,
 'notClickbate-prob': 0.0027670410874253925,
 'flag': 'Looks Safe',
 'color': 'safe-color'}