#### Incremental Learning- Model Retraining Approach

In [1]:
import pandas as pd

messages = pd.read_csv('data/spamSMS', sep='\t',
                           names=["label", "message"])

In [2]:
messages.shape

(5572, 2)

In [3]:
from sklearn.model_selection import train_test_split
message_train,message_test=train_test_split(messages)

In [4]:
message_train

Unnamed: 0,label,message
1687,spam,todays vodafone numbers ending with 0089(my la...
2072,ham,Good night my dear.. Sleepwell&amp;Take care
1027,ham,Yes da. Any plm at ur office
2466,ham,S.i think he is waste for rr..
3456,ham,No need lar. Jus testing e phone card. Dunno n...
...,...,...
2088,ham,"alright babe, justthought id sayhey! how u do..."
3421,spam,"As a valued customer, I am pleased to advise y..."
866,spam,Congratulations ur awarded either a yrs supply...
3764,ham,K.i will send in &lt;#&gt; min:)


In [5]:
# Convert dataframe to list of tuples
messages_train = message_train.to_records(index=False)
messages_test=message_test.to_records(index=False)

In [6]:
messages_train

rec.array([('spam', 'todays vodafone numbers ending with 0089(my last four digits) are selected to received a £350 award. If your number matches please call 09063442151 to claim your £350 award'),
           ('ham', 'Good night my dear.. Sleepwell&amp;Take care'),
           ('ham', 'Yes da. Any plm at ur office'), ...,
           ('spam', 'Congratulations ur awarded either a yrs supply of CDs from Virgin Records or a Mystery Gift GUARANTEED Call 09061104283 Ts&Cs www.smsco.net £1.50pm approx 3mins'),
           ('ham', 'K.i will send in  &lt;#&gt;  min:)'),
           ('ham', "Fyi I'm gonna call you sporadically starting at like  &lt;#&gt;  bc we are not not doin this shit")],
          dtype=[('label', 'O'), ('message', 'O')])

In [7]:
# Creating the pipeline
# 1st function is creating the TFIDF
# 2nd function is the naive bayes predictor
import math
from creme import compose
from creme import feature_extraction
from creme import naive_bayes
import creme
model = compose.Pipeline(
    ('tokenize', feature_extraction.TFIDF(lowercase=False)),
    ('nb', naive_bayes.MultinomialNB(alpha=1))
)

In [8]:
from creme import metrics
metric=metrics.Accuracy()
# Training the model row by row
for label,sentence in messages_train:
    model = model.fit_one(sentence, label)
    y_pred = model.predict_one(sentence)
    metric = metric.update(label, y_pred)
   

In [9]:
### Training Data Accuracy
metric

Accuracy: 95.79%

In [10]:
### test Data Accuracy
test_metric=metrics.Accuracy()
for label,sentence in messages_test:
    y_pred = model.predict_one(sentence)
    test_metric = metric.update(label, y_pred)

In [11]:
### test Metric
test_metric

Accuracy: 95.93%

In [12]:
model.fit_one("We will be meeting tomorrow at 12 pm", "ham")
model.fit_one("Hurry today is the last day for this best discount", "spam")

Pipeline (
  TFIDF (
    normalize=True
    on=None
    strip_accents=True
    lowercase=False
    preprocessor=None
    tokenizer=<built-in method findall of re.Pattern object at 0x0000021BA031D370>
    ngram_range=(1, 1)
  ),
  MultinomialNB (
    alpha=1
  )
)