In [2]:
import nltk

In [3]:
nltk.download('twitter_samples')

[nltk_data] Downloading package twitter_samples to
[nltk_data]     C:\Users\desire\AppData\Roaming\nltk_data...
[nltk_data]   Package twitter_samples is already up-to-date!


True

In [7]:
# Tokenizing the data
from nltk.corpus import twitter_samples as ts

In [8]:
positive_tweets = ts.strings('positive_tweets.json')
negative_tweets = ts.strings('negative_tweets.json')
text = ts.strings('tweets.20150430-223406.json')

In [9]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\desire\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [10]:
tweet_tokens = ts.tokenized('positive_tweets.json')

In [11]:
tweet_tokens[0][0]

'#FollowFriday'

In [12]:
# Normalizing the data.
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\desire\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [13]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\desire\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [14]:
from nltk.tag import pos_tag

In [15]:
print(pos_tag(tweet_tokens[0]))

[('#FollowFriday', 'JJ'), ('@France_Inte', 'NNP'), ('@PKuchly57', 'NNP'), ('@Milipol_Paris', 'NNP'), ('for', 'IN'), ('being', 'VBG'), ('top', 'JJ'), ('engaged', 'VBN'), ('members', 'NNS'), ('in', 'IN'), ('my', 'PRP$'), ('community', 'NN'), ('this', 'DT'), ('week', 'NN'), (':)', 'NN')]


In [16]:
from nltk.stem.wordnet import WordNetLemmatizer

In [17]:
def lemmatize_sentence(tokens):
    lemmatizer = WordNetLemmatizer()
    lemmatize_sentence = []
    for word, tag in pos_tag(tokens):
        if tag.startswith('NN'):
            pos = 'n'
        elif tag.startswith('VB'):
            pos='v'
        else:
            pos = 'a'
        lemmatize_sentence.append(lemmatizer.lemmatize(word, pos))
    return lemmatize_sentence

In [18]:
lemmatize_sentence(tweet_tokens[0])

['#FollowFriday',
 '@France_Inte',
 '@PKuchly57',
 '@Milipol_Paris',
 'for',
 'be',
 'top',
 'engage',
 'member',
 'in',
 'my',
 'community',
 'this',
 'week',
 ':)']

In [19]:
# Removing Noise from the data.

import re, string

In [20]:
def remove_noise(tweet_tokens, stop_words=()):
    cleaned_tokens = []
    for token, tag in pos_tag(tweet_tokens):
        token = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|'\
                       '(?:%[0-9a-fA-F][0-9a-fA-F]))+','', token)
        token = re.sub("(@[A-Za-z0-9_]+)","", token)
        if tag.startswith("NN"):
            pos = 'n'
        elif tag.startswith("VB"):
            pos = 'v'
        else:
            pos = 'a'
        lemmatizer = WordNetLemmatizer()
        token = lemmatizer.lemmatize(token, pos)
        
        if len(token)>0 and token not in string.punctuation and token.lower() not in stop_words:
            cleaned_tokens.append(token.lower())
    return cleaned_tokens

In [21]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\desire\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [22]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

In [23]:
remove_noise(tweet_tokens[0], stop_words)

['#followfriday', 'top', 'engage', 'member', 'community', 'week', ':)']

In [24]:
positive_tweets_tokens = ts.tokenized('positive_tweets.json')
negative_tweets_tokens = ts.tokenized('negative_tweets.json')

positive_cleaned_tokens_list = [remove_noise(tokens, stop_words) for tokens in positive_tweets_tokens]
negative_cleaned_tokens_list = [remove_noise(tokens, stop_words) for tokens in negative_tweets_tokens]

In [25]:
print(positive_tweets_tokens[500])
print(positive_cleaned_tokens_list[500])

['Dang', 'that', 'is', 'some', 'rad', '@AbzuGame', '#fanart', '!', ':D', 'https://t.co/bI8k8tb9ht']
['dang', 'rad', '#fanart', ':d']


In [37]:
def get_all_words(cleaned_tokens_list):
    for tokens in cleaned_tokens_list:
        for token in tokens:
            yield token

In [38]:
all_pos_words = get_all_words(positive_cleaned_tokens_list)

In [39]:
from nltk import FreqDist

In [40]:
freq_dist_pos = FreqDist(all_pos_words)

In [41]:
freq_dist_pos.most_common(10)

[(':)', 3691),
 (':-)', 701),
 (':d', 658),
 ('thanks', 388),
 ('follow', 357),
 ('love', 333),
 ('...', 290),
 ('good', 283),
 ('get', 263),
 ('thank', 253)]

In [42]:
# Preparing Data for the Model:

def get_tweets_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)

In [43]:
positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list)
negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list)

In [44]:
import random

In [45]:
positive_dataset = [(tweet_dict, "Positive ") for tweet_dict in positive_tokens_for_model]
negative_dataset = [(tweet_dict, "Negative ") for tweet_dict in negative_tokens_for_model]
dataset = positive_dataset + negative_dataset
random.shuffle(dataset)
train_data = dataset[:7000]
test_data = dataset[7000:]

In [46]:
# Building and Testing the Model:

In [47]:
from nltk import classify
from nltk import NaiveBayesClassifier
classifier = NaiveBayesClassifier.train(train_data)

In [48]:
print("Accuracy is: ", classify.accuracy(classifier, test_data))
print(classifier.show_most_informative_features(10))

Accuracy is:  0.9946666666666667
Most Informative Features
                      :) = True           Positi : Negati =   1642.1 : 1.0
                     sad = True           Negati : Positi =     24.1 : 1.0
                follower = True           Positi : Negati =     23.1 : 1.0
                     bam = True           Positi : Negati =     21.4 : 1.0
                 welcome = True           Positi : Negati =     17.1 : 1.0
               community = True           Positi : Negati =     16.1 : 1.0
                     x15 = True           Negati : Positi =     15.8 : 1.0
                  arrive = True           Positi : Negati =     14.8 : 1.0
              appreciate = True           Positi : Negati =     14.2 : 1.0
                followed = True           Negati : Positi =     14.0 : 1.0
None


In [49]:
from nltk.tokenize import word_tokenize

In [50]:
cutom_tweet = 'I ordered just once from TerribleCo, they screwed up, never used the app again.'
custom_tokens = remove_noise(word_tokenize(cutom_tweet))

In [51]:
classifier.classify(dict([token, True] for token in custom_tokens))

'Positive '

In [52]:
custom_tweets = "Congrats #SportStar on your 7th best goal last season winning goal of the year :) #Baller #Topbin #oneofmanyworldies"

In [53]:
custom_tokens = remove_noise(word_tokenize(custom_tweets))

In [54]:
classifier.classify(dict([token, True] for token in custom_tokens))

'Positive '

In [55]:
custom_tweet = 'Thank you for sending my baggage to CityX and flying me to CityY at the same time. Brilliant service. #thanksGenericAirline'

In [56]:
custom_token = remove_noise(word_tokenize(custom_tweet))
x = classifier.classify(dict([token, True] for token in custom_token))

In [62]:
from tkinter import *
#from tkinter import messagebox

In [None]:
'''
# pip install pillow
from PIL import Image, ImageTk

class Window(Frame):
    def __init__(self, master=None):
        Frame.__init__(self, master)
        self.master = master
        self.pack(fill=BOTH, expand=1)
        
        load = Image.open("parrot.jpg")
        render = ImageTk.PhotoImage(load)
        img = Label(self, image=render)
        img.image = render
        img.place(x=0, y=0)

        
root = Tk()
app = Window(root)
        '''


# pip install pillow
from PIL import Image, ImageTk
class Window(Frame):
    def __init__(self, master=None):
        Frame.__init__(self, master)
        self.master = master
        self.pack(fill=BOTH, expand=1)
        
        load = Image.open("p.png")
        render = ImageTk.PhotoImage(load)
        img = Label(self, image=render)
        img.image = render
        img.place(x=0, y=0)

def button(x):
    
    custom_tweet = e.get()
    if custom_tweet=="":
        var = "Please write Tweet..."
    else:
        custom_token = remove_noise(word_tokenize(custom_tweet))
        x = classifier.classify(dict([token, True] for token in custom_token))
        var=x
        if var=='Positive ':
            root = Tk()
            app = Window(root)
            root.wm_title("Tkinter window")
            root.geometry("216x233")
            root.mainloop()
            
            #messagebox.showinfo("Sentiment Result  :)", var)


if __name__=="__main__":
    
    tk = Tk()

    tk.geometry("900x400")
    tk.configure(bg='blue')

    labelFrame = LabelFrame(tk, text="Tweet Entry Window: ", font=15, bg='pink', bd=4, height=300, relief=RAISED)
    labelFrame.pack(fill=X, anchor=N, padx=50, pady=60, ipady=10)

    l1 = Label(labelFrame, text="Enter you Tweet:", font=25, relief=RAISED)
    l1.pack(anchor=NW, padx=70, pady=15, ipadx=10 )

    e = Entry(labelFrame, width=80, font=40, relief=RAISED, xscrollcommand=True)
    e.pack(anchor=NW, ipady=10, padx=100, pady=10)

    b1 = Button(labelFrame, text="Sentiment", bg='skyblue', cursor='tcross', font=20, command=lambda:button(x))
    b1.pack(anchor=N, expand="yes", ipady=5, ipadx=7, pady=20)
    

    tk.resizable(0, 0)
    tk.mainloop()