# Naive Bayes Part 1 <a class="tocSkip">

In [1]:
import os
import io
import numpy 
from pandas import DataFrame
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

In [2]:
def readFiles(path):
    for root,dirnames,filenames in os.walk(path):
        for filename in filenames:
            path=os.path.join(root,filename)
            
            inBody=False
            lines =[]
            f = io.open(path,'r',encoding='latin1')
            for line in f:
                if inBody:
                    lines.append(line)
                elif line=='\n':
                    inBody=True
            f.close()
            message = '\n'.join(lines)
            yield path,message    

In [3]:
def dataFrameFromDirectory(path,classification):
    rows =[]
    index=[]
    for filename,message in readFiles(path):
        rows.append({'message': message,'class': classification})
        index.append(filename)
        
    return DataFrame(rows,index=index)

# Train set

In [4]:
train = DataFrame({'message':[],'class':[]})
train

Unnamed: 0,message,class


In [5]:
train=train.append(dataFrameFromDirectory(r'C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam','spam'))
train=train.append(dataFrameFromDirectory(r'C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\ham','ham'))


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [6]:
print(train.shape)
train.head()

(603, 2)


Unnamed: 0,class,message
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\desktop.ini,spam,
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga140.txt,spam,"earn over $ 70 , 000 a month in your own home ..."
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga141.txt,spam,this message complies with the proposed united...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga142.txt,spam,"make unlimited income no meetings , no phone c..."
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga143.txt,spam,are you ready to reach new prospects without t...


In [7]:
#remove empty emails (there are two because of google drive .ini files)
train=train.loc[train['message']!='']
print(train.shape)
train.head()

(601, 2)


Unnamed: 0,class,message
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga140.txt,spam,"earn over $ 70 , 000 a month in your own home ..."
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga141.txt,spam,this message complies with the proposed united...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga142.txt,spam,"make unlimited income no meetings , no phone c..."
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga143.txt,spam,are you ready to reach new prospects without t...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\train\spam\spmsga144.txt,spam,* * * * * * warning ! this message is for porn...


## Vectorizer

In [8]:
vectorizer =  CountVectorizer(min_df=1,lowercase=True,analyzer='word', binary=False, decode_error='strict',token_pattern='(?u)\\b\\w\\w+\\b')
x_train = vectorizer.fit_transform(train['message'].values)
print(x_train.shape)

(601, 20939)


In [9]:
trainfrequencies=DataFrame(x_train.toarray(),columns=vectorizer.get_feature_names())
print(trainfrequencies.shape)
trainfrequencies.head()

(601, 20939)


Unnamed: 0,00,000,0000,0001,00014,000bp,001,00198,002656,0027,...,zukuenftigen,zulia,zulu,zur,zurich,zwart,zwarts,zweigenbaum,zwicky,zwischen
0,21,14,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
y_train = train['class'].values
print(y_train.shape)

(601,)


# Test set

In [11]:
test = DataFrame({'message':[],'class':[]})
test

Unnamed: 0,message,class


In [12]:
test=test.append(dataFrameFromDirectory(r'C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam','spam'))
test=test.append(dataFrameFromDirectory(r'C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\ham','ham'))

In [13]:
print(test.shape)
test.head()

(203, 2)


Unnamed: 0,class,message
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\desktop.ini,spam,
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga100.txt,spam,if you want the best hunting and camping vacat...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga101.txt,spam,57 million email addresses for only $ 99 you w...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga102.txt,spam,attention ! warning ! adults only ! warning ! ...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga103.txt,spam,attention ! warning ! adults only ! warning ! ...


In [14]:
#remove empty emails (there are two because of google drive .ini files)
test=test.loc[test['message']!='']
print(test.shape)
test.head()

(201, 2)


Unnamed: 0,class,message
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga100.txt,spam,if you want the best hunting and camping vacat...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga101.txt,spam,57 million email addresses for only $ 99 you w...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga102.txt,spam,attention ! warning ! adults only ! warning ! ...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga103.txt,spam,attention ! warning ! adults only ! warning ! ...
C:\Users\Paul\Documents\_SXOLH\Machine Learning\Projects\Project_4\Project_4__classification\Project 4 - classification\emailspam\test\spam\spmsga104.txt,spam,subject : re : are you in debt ? if you are th...


In [15]:
vectorizer =  CountVectorizer(min_df=1,lowercase=True,analyzer='word', binary=False, decode_error='strict',token_pattern='(?u)\\b\\w\\w+\\b')
x_test = vectorizer.fit_transform(test['message'].values)
print(x_test.shape)

(201, 10889)


In [16]:
testfrequencies=DataFrame(x_test.toarray(),columns=vectorizer.get_feature_names())
print(testfrequencies.shape)
testfrequencies.head()

(201, 10889)


Unnamed: 0,00,000,00001,00003000140,00014,0003,001,0027,00333,0057,...,zpun,zribi,zsazsa36,zsuzsanna,zulu,zurich,zwarts,zxgah7qabjh,zygmunt,zz214
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,10,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
y_test = test['class'].values
print(y_test.shape)

(201,)


# Naive Bayes Classifier

## Vocabulary size

In [18]:
words=trainfrequencies.columns.tolist() + testfrequencies.columns.tolist()
len(words)

31828

In [19]:
V=np.unique(words).shape[0]
V

24745

## Spam word counts

In [20]:
spam_train=train.loc[train['class']=='spam']
print(spam_train.shape)

(300, 2)


In [21]:
pSPAM = spam_train.shape[0]/train.shape[0]

In [22]:
vectorizer =  CountVectorizer(min_df=1,lowercase=True,analyzer='word', binary=False, decode_error='strict',token_pattern='(?u)\\b\\w\\w+\\b')
spamx_train = vectorizer.fit_transform(spam_train['message'].values)
print(spamx_train.shape)

(300, 9399)


In [23]:
spamfrequencies=DataFrame(spamx_train.toarray(),columns=vectorizer.get_feature_names())
print(spamfrequencies.shape)
spamfrequencies.head()

(300, 9399)


Unnamed: 0,00,000,0000,0037,0057,007,0073,00am,00pm,01,...,zillion,zine,zip,zip2,zippergate,zoid,zone,zoo,zorro,zscn
0,21,14,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
total_spam_words=np.sum(np.sum(spamfrequencies))
total_spam_words

175372

### Demonstration

In [25]:
#count of times the word "money" was in a spam e-mail
np.sum(spamfrequencies['money'])

617

In [26]:
#probability of seeing the word "money" , given that we are reading a spam e-mail
p_word_spam = (np.sum(spamfrequencies['money']) + 1)/(total_spam_words + V)
p_word_spam

0.0030881934068569886

## Ham word counts

In [27]:
ham_train=train.loc[train['class']=='ham']
print(ham_train.shape)

(301, 2)


In [28]:
pHAM = ham_train.shape[0]/train.shape[0]

In [30]:
vectorizer =  CountVectorizer(min_df=1,lowercase=True,analyzer='word', binary=False, decode_error='strict',token_pattern='(?u)\\b\\w\\w+\\b')
hamx_train = vectorizer.fit_transform(ham_train['message'].values)
print(hamx_train.shape)

(301, 15700)


In [31]:
hamfrequencies=DataFrame(hamx_train.toarray(),columns=vectorizer.get_feature_names())
print(hamfrequencies.shape)
hamfrequencies.head()

(301, 15700)


Unnamed: 0,00,000,0001,00014,000bp,001,00198,002656,0027,0049,...,zukuenftigen,zulia,zulu,zur,zurich,zwart,zwarts,zweigenbaum,zwicky,zwischen
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,6,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
total_ham_words=np.sum(np.sum(hamfrequencies))
total_ham_words

127693

### Demonstration

In [33]:
#count of times the word "money" was in a ham e-mail
np.sum(hamfrequencies['money'])

9

In [34]:
#probability of seeing the word "money", given that we are reading a ham e-mail
p_word_ham = (np.sum(hamfrequencies['money']) + 1)/(total_ham_words + V)
p_word_ham

6.560044083496242e-05

No surprise here.There is a higher probability of seeing the word "money" in a spam e-mail.

# Probability calculation

Log Probability Naive Bayes

http://www.cs.rhodes.edu/~kirlinp/courses/ai/f18/projects/proj3/naive-bayes-log-probs.pdf

In [46]:
unique_ham_words = hamfrequencies.columns.tolist()
unique_spam_words = spamfrequencies.columns.tolist()

print(len(unique_ham_words))
print(len(unique_spam_words))

15700
9399


In [47]:
#probability of occurence of word 'word' given class 'mailclass'

def pWordGivenClass(word,mailclass):
      
    if (mailclass=='spam'):
        
        if(word in unique_spam_words):
            
            count = spamfrequencies[word].sum()
        else:
             count = 0   
        
        
        probability= (count + 1)/(total_spam_words + V)
        
    elif (mailclass=='ham'):
        
        if(word in unique_ham_words):
            
            count= hamfrequencies[word].sum()
        else:
             count = 0   
        
        
        probability= (count + 1)/(total_ham_words + V)
          
            
    return probability




#probability of occurence of an e-mail ( passed as index of bag-of-word matrix / test_set ) given class 'mailclass'
def pMessageGivenClass(index,mailclass,test_set):
    
    total_probability = 0
    
    mail = test_set.loc[index,:]
    test_set = test_set.columns.tolist()
    
    print('Start for loop--------')
    for word,count in zip(test_set,mail) :
        
        total_probability = total_probability + count*np.log(pWordGivenClass(word,mailclass))
    
        #print(word,count)
    
    print('End for loop--------')
    
    
    
    if(mailclass=='spam'):
        total_probability=total_probability + np.log(pSPAM)
    else:
        total_probability=total_probability + np.log(pHAM)
    
    return total_probability
    

## Test a mail

In [49]:
testindex=12

print(test['message'][testindex][0:500])


spamprob = pMessageGivenClass(testindex,'spam',testfrequencies)
hamprob = pMessageGivenClass(testindex,'ham',testfrequencies)

print('\n Log of SPAM probability',spamprob)
print('\n Log of HAM probability',hamprob)

if(spamprob < hamprob):
    print('\n---HAM---')
else:
    print('\n---SPAM---')

the rumors are true ! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * it 's true ! you can make a lot of * money * marketing on the web ! with the potential to reach millions , the internet is a dream come true - - for those that have the tools and know how to use them . that is why i want to tell you about online success . online success is an outstanding internet marketing resource center that will provide you with the tools and the training necessary to cash in on huge pro
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Log of SPAM probability -1111.0401882231363

 Log of HAM probability -1273.2850091185915

---SPAM---


# Evaluation on Test set

In [51]:
correct=0

for index in range(test.shape[0]):
    
    spamprob = pMessageGivenClass(index,'spam',testfrequencies)
    hamprob = pMessageGivenClass(index,'ham',testfrequencies)

    if(spamprob < hamprob):
        prediction ='ham'
    else:
        prediction ='spam'

    if(prediction==test['class'][index]):
        print('\n Correct classification as ',prediction)
        correct=correct + 1
    else:
        print('\n Incorrect classification as ',prediction)
        
print('\n Accuracy score:',(correct / test.shape[0]) )
    

Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
E

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Sta

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for l

# Evaluation on Train set

In [52]:
correct=0

for index in range(train.shape[0]):
    
    spamprob = pMessageGivenClass(index,'spam',trainfrequencies)
    hamprob = pMessageGivenClass(index,'ham',trainfrequencies)

    if(spamprob < hamprob):
        prediction ='ham'
    else:
        prediction ='spam'

    if(prediction==train['class'][index]):
        print('\n Correct classification as ',prediction)
        correct=correct + 1
    else:
        print('\n Incorrect classification as ',prediction)
        
print('\n Accuracy score:',(correct / train.shape[0]) )
    

Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
E

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Sta

End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 C

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Sta

End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  spam
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 C

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for l

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for l

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for l

End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for loop--------
End for loop--------

 Correct classification as  ham
Start for loop--------
End for loop--------
Start for l

0.997 on Train set  
0.995 on Test set

**Feature selection, random subsets (20%, 40% etc) training and cross-validation are in** ***Naive Bayes Part 2*** **notebook**