## Import Relevant packages

In [1]:
import pandas as pd
import numpy as np
import os

# For Data Visualisation
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
from collections import Counter

# For Data Preparation
from wordcloud import WordCloud, STOPWORDS
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from sklearn.tree import plot_tree
import re

# For Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from __future__ import division
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

# For Evaluation of Results
from sklearn.metrics import f1_score
from sklearn import metrics

# For Product
from IPython.display import HTML
from termcolor import colored
from pyfiglet import Figlet

pd.options.mode.chained_assignment = None

# Spam Buster 3000

In [2]:
f = Figlet(font = 'slant', justify="center")
print(colored(f.renderText("Spam Buster 3000")))

           _____                          ____             __           
          / ___/____  ____ _____ ___     / __ )__  _______/ /____  _____
          \__ \/ __ \/ __ `/ __ `__ \   / __  / / / / ___/ __/ _ \/ ___/
         ___/ / /_/ / /_/ / / / / / /  / /_/ / /_/ (__  ) /_/  __/ /    
        /____/ .___/\__,_/_/ /_/ /_/  /_____/\__,_/____/\__/\___/_/     
            /_/                                                         
                              _____ ____  ____  ____ 
                             |__  // __ \/ __ \/ __ \
                              /_ </ / / / / / / / / /
                            ___/ / /_/ / /_/ / /_/ / 
                           /____/\____/\____/\____/  
                                                     
[0m


### Try out Spam Buster 3000 below! Type in the message you received and let it decide if it is Spam!

#### Description: 

The ultimate spam detection tool designed to keep your inbox free of unwanted and harmful messages. With its powerful model ensemble of Naive Bayes, SVM, RFC, and Logistic Regression, Spam Buster 3000 constantly updates its dataframe with every new user input to improve accuracy and robustness over time. Its advanced algorithms allow for a comprehensive analysis of incoming data, ensuring that no spam goes unnoticed. Spam Buster 3000 is easy to use and highly customizable, giving you full control over how you want to manage your inbox. Say goodbye to spam once and for all with Spam Buster 3000.


#### Sample Spam: 

Best 0ffer L0AN
Personal & Business L0AN
Accept  Blacklist
N0 Hidden Fees
N0 Guarantor
Private & Confidential
https://wa.me/84164822

#### Sample Ham: 

Good Morning Janice! Excited to see you today!



In [4]:
traindf = pd.read_csv('train.csv')

cv = TfidfVectorizer(min_df = 1, stop_words='english')
pattern = r'\d{6,}'
traindf['Contain_HP'] = traindf['Message'].apply(lambda x: bool(re.search(pattern, x)))
traindf = traindf.drop('Unnamed: 0', axis=1)

x_input = cv.fit_transform(traindf.Message)

mnb_nb = MultinomialNB()
y_input = traindf.Category.astype(int)

mnb_nb.fit(x_input, y_input)

classifier_svc = SVC(random_state = 0)
classifier_svc.fit(x_input, y_input) 

classifier_rfc = RandomForestClassifier()
classifier_rfc.fit(x_input, y_input)

x_input_combined = pd.concat([pd.DataFrame(x_input.toarray()), traindf['Contain_HP'].reset_index(drop=True)], axis=1)

lr = LogisticRegression()
x_input_combined.columns = x_input_combined.columns.astype(str)
lr.fit(x_input_combined, y_input)

while 1:
    message = input("SPAM BUSTER 3000 (enter 'quit' to exit):\t")
    if message == 'quit': break
    buster_dict = {"Message": [message]}
    busterdf = pd.DataFrame.from_dict(buster_dict)
    busterdf['Contain_HP'] = busterdf['Message'].apply(lambda x: bool(re.search(pattern, x)))
    bustercv = cv.transform(busterdf.Message)
    buster_combined = pd.concat([pd.DataFrame(bustercv.toarray()), busterdf['Contain_HP'].reset_index(drop=True)], axis=1)
    buster_combined.columns = buster_combined.columns.astype(str)

    buster_nb = mnb_nb.predict(bustercv)
    buster_svc = classifier_svc.predict(bustercv)
    buster_rfc = classifier_rfc.predict(bustercv)
    buster_lr = lr.predict(buster_combined)
    buster_score = buster_nb[0] + buster_svc[0] + buster_rfc[0] + buster_lr[0]
    if buster_score > 0:
        messagetoprint = 'SPAM ALERT!'
        pred = 1
    else:
        messagetoprint = "You're Safe!"
        pred = 0

    print(colored(f.renderText(messagetoprint)))
    print("We appreciate your feedback: \U0001f917")
    correct_pred = str(input("Was it a correct prediction? (Y/N)\t")).lower()
    while 1:
        if correct_pred == 'y':
            newrow = {"Category" : pred, "Message":message,"Contain_HP" : bool(re.search(pattern, message))}
            print("We are glad Spam Buster 3000 helped you! \U0001f970")
            break
        elif correct_pred == 'n':
            print("Thank you for your feedback and helping make Spam Buster 3000 better! \U0001f972")
            if pred == 1:
                newrow = {"Category" : 0, "Message":message,"Contain_HP":bool(re.search(pattern, message))}
            else:
                newrow = {"Category" : 1, "Message":message,"Contain_HP":bool(re.search(pattern, message))}
            break
        else: correct_pred = str(input("Was it a correct prediction? (Y/N)\t")).lower()
    traindf.loc[len(traindf)] = newrow

try:
    os.remove("train.csv")
except:
    pass
traindf.to_csv("train.csv")
print(colored(f.renderText("Thank you for using Spam Buster 3000!")))


SPAM BUSTER 3000 (enter 'quit' to exit):	here
            __  __           _               _____       ____     __
            \ \/ /___  __  _( )________     / ___/____ _/ __/__  / /
             \  / __ \/ / / /// ___/ _ \    \__ \/ __ `/ /_/ _ \/ / 
             / / /_/ / /_/ / / /  /  __/   ___/ / /_/ / __/  __/_/  
            /_/\____/\__,_/ /_/   \___/   /____/\__,_/_/  \___(_)   
                                                                    
[0m
We appreciate your feedback: ðŸ˜€
Was it a correct prediction? (Y/N)	y
SPAM BUSTER 3000 (enter 'quit' to exit):	quit
       ________                __                           ____          
      /_  __/ /_  ____ _____  / /__   __  ______  __  __   / __/___  _____
       / / / __ \/ __ `/ __ \/ //_/  / / / / __ \/ / / /  / /_/ __ \/ ___/
      / / / / / / /_/ / / / / ,<    / /_/ / /_/ / /_/ /  / __/ /_/ / /    
     /_/ /_/ /_/\__,_/_/ /_/_/|_|   \__, /\____/\__,_/  /_/  \____/_/     
                                   /____/  