In [54]:
#import the required modules
import numpy as np
import pandas as pd
from tkinter import *

# Initialise a count vectorizer and split the dataset into training and testing data.
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


class SpamClassifier:
    def __init__(self):
        self.dataFrame_Messages, self.dataFrame_Label=self.Data_Cleanup()
        self.SMS_Classifier, self.count_Vectorizer=self.Create_Classifier()
        
    def Data_Cleanup(self):
        data=pd.read_csv("spam.csv", encoding="latin-1")
        data=data.drop(["Unnamed: 0"], 1)
        data=data.rename(columns={"v1":"label", "v2":"message"})
        
        # Convert spam=1 and ham=0 for ease of working with the ML model
        df=data
        df.loc[df['label']=='spam', 'label']=1
        df.loc[df['label']=='ham', 'label']=0
        
        # Split messages and label so as to divide them into test and training data, 
        df_x = df['message']
        df_y = df['label']
        
        #print(df.head())
        #print(self.dataset.head())
        
        return df_x, df_y
    
    def Create_Classifier(self):
        
        def Classifier_Test():
            
            # Transform the test data using Count Vectorizer and test for accuracy
            testing_Data=cv.transform(messages_For_Test)
            predicted_Output=spam_Classifier.predict(testing_Data)
            expected_Output=np.array(labels_For_Test)
            
            # Convert the test data into the similar format as of predicted_Output, so that the confusion matrix can be generated.
            expected_Output_List=[]
            for output in expected_Output:
                expected_Output_List.append(output)
            
            precisionScore = precision_score(expected_Output_List, predicted_Output)
            recallScore = recall_score(expected_Output_List, predicted_Output)
            f1Score = f1_score(expected_Output_List, predicted_Output)
            accuracyScore = accuracy_score(expected_Output_List, predicted_Output)
            confusionMatrix = confusion_matrix(expected_Output_List, predicted_Output)
            
            result={"precisionScore":precisionScore, "recallScore":recallScore, "f1Score":f1Score, "accuracyScore":accuracyScore, "confusionMatrix":confusionMatrix}
            
            return result
            
            
        cv=CountVectorizer()
        messages_For_Train, messages_For_Test, labels_For_Train, labels_For_Test = train_test_split(self.dataFrame_Messages, self.dataFrame_Label, test_size=0.5, random_state=4)
        
        # Transform the training data using the count vectorizer, this creates a frequency array of words for each message 
        training_Data = cv.fit_transform(messages_For_Train)
        
        ## The features/words extracted by the count vectorizer from the training dataset.
        ## Each  message will be checked for these words.
        #cv.get_feature_names()
        
        ## trainig_Data_Array represents for each message in the dataset, how many times a feature/word has been repeated
        training_Data_Array = training_Data.toarray()
        ## The number of messages in training set and how many have been checked by Count Vectorizer
        #len(arr)
        
        spam_Classifier = MultinomialNB()
        
        # Conversion of training data of 0's and 1's into integers as they are strings by default
        labels_For_Train = labels_For_Train.astype('int')
        
        spam_Classifier.fit(training_Data,  labels_For_Train)
        
        test_Results=Classifier_Test()
        
        #print(test_Results)
        
        return spam_Classifier, cv
    

class App:
    def __init__(self):
        
        self.input_List=[]
        self.Spam_Classifier=SpamClassifier()
    
    def Start_App(self):
        
        def Submit():
            input_Text=user_Input.get()
            if(input_Text!=""):
                self.input_List.append(input_Text)
                userInputEntry.delete(0, END)
                prediction_Text=self.Get_Prediction()
                outputLabel.config(text=prediction_Text)
            else:
                outputLabel.config(text="Please enter a message.")
            
            prediction_Text=""
                

        def Exit():
            mainWindow.destroy()

        mainWindow=Tk()
        mainWindow.title("SMS Spam Classifier")
        mainWindow.geometry("400x400")

        welcomeLabel=Label(master=mainWindow, text="Welcome To SMS CLASSIFIER")
        welcomeLabel.grid(row=0, column=0, columnspan=5, padx=100, pady=40)

        userInputLabelFrame=LabelFrame(master=mainWindow, text="Enter SMS to be classified")
        userInputLabelFrame.grid(row=1, column=0, rowspan=3, columnspan=5, padx=30)
        
        user_Input=StringVar()
        userInputEntry=Entry(master=userInputLabelFrame, textvariable=user_Input)
        userInputEntry.grid(row=4, column=0, columnspan=5, padx=100, pady=20)

        outputLabel=Label(master=mainWindow, text="")
        outputLabel.grid(row=5, column=0, columnspan=5, padx=100, pady=10)

        submitButton=Button(mainWindow, text="Submit", command=Submit)
        submitButton.grid(row=6, column=0, columnspan=3, padx=10, pady=20)

        exitButton=Button(mainWindow, text="     Exit     ", command=Exit)
        exitButton.grid(row=6, column=2, columnspan=3, padx=10, pady=20)

        mainWindow.mainloop()
    

        
    def Get_Prediction(self):
        output_Text=None
        latest_Index=len(self.input_List)-1
        test_Data = self.Spam_Classifier.count_Vectorizer.transform(self.input_List)
        prediction = self.Spam_Classifier.SMS_Classifier.predict(test_Data)
        prediction = prediction.tolist()
        print(prediction)
        if prediction[latest_Index]==1:
            output_Text="SMS is SPAM: Be cautious......"
        else:
            output_Text="SMS is NOT SPAM: You are safe....."
        return output_Text
                
    def Write_To_CSV(self):
        user_Input_Dictionary={
            "label":self.classification_Of_Input_List,
            "message":self.input_List
        }
        
        user_Input_Data_Frame=pd.DataFrame(user_Input_Dictionary)
        data_Frame_To_Write=user_Input_Data_Frame.append(self.Spam_Classifier.data)
        data_Frame_To_Write.loc[data_Frame_To_Write['label']==1, 'label']="spam"
        data_Frame_To_Write.loc[data_Frame_To_Write['label']==0, 'label']="ham"
        #print(data_Frame_To_Write)
        data_Frame_To_Write.to_csv("spam-appended.csv", index=False)

In [55]:
# "Dear Customer your pyatm wallet Has been booked and hold Your amount please complete your kyc contact customer care 6200992462"
# "Junglee Rummy win 1000RS"
# "Hey John, Hope you are doing fine, I am in Chicago for a week, let us catch up sometime. Have a great day !"
classifier_App = App()   
classifier_App.Start_App()

  data=data.drop(["Unnamed: 0"], 1)


[1]
[1, 0]
[1, 0, 1]


[1 1]
