In [11]:
import sys
import pickle
import random


def file_to_wordset(filename):
    ''' Converts a file with a word per line to a Python set '''
    words = []
    with open(filename, 'r') as f:
        for line in f:
            words.append(line.strip())
    return set(words)


def write_status(i, total):
    ''' Writes status of a process to console '''
    sys.stdout.write('\r')
    sys.stdout.write('Processing %d/%d' % (i, total))
    sys.stdout.flush()


def save_results_to_csv(results, csv_file):
    ''' Save list of type [(tweet_id, positive)] to csv in Kaggle format '''
    with open(csv_file, 'w') as csv:
        csv.write('id,prediction\n')
        for tweet_id, pred in results:
            csv.write(tweet_id)
            csv.write(',')
            csv.write(str(pred))
            csv.write('\n')


def top_n_words(pkl_file_name, N, shift=0):
    """
    Returns a dictionary of form {word:rank} of top N words from a pickle
    file which has a nltk FreqDist object generated by stats.py

    Args:
        pkl_file_name (str): Name of pickle file
        N (int): The number of words to get
        shift: amount to shift the rank from 0.
    Returns:
        dict: Of form {word:rank}
    """
    with open(pkl_file_name, 'rb') as pkl_file:
        freq_dist = pickle.load(pkl_file)
    most_common = freq_dist.most_common(N)
    words = {p[0]: i + shift for i, p in enumerate(most_common)}
    return words


def top_n_bigrams(pkl_file_name, N, shift=0):
    """
    Returns a dictionary of form {bigram:rank} of top N bigrams from a pickle
    file which has a Counter object generated by stats.py

    Args:
        pkl_file_name (str): Name of pickle file
        N (int): The number of bigrams to get
        shift: amount to shift the rank from 0.
    Returns:
        dict: Of form {bigram:rank}
    """
    with open(pkl_file_name, 'rb') as pkl_file:
        freq_dist = pickle.load(pkl_file)
    most_common = freq_dist.most_common(N)
    bigrams = {p[0]: i for i, p in enumerate(most_common)}
    return bigrams


def split_data(tweets, validation_split=0.1):
    """Split the data into training and validation sets

    Args:
        tweets (list): list of tuples
        validation_split (float, optional): validation split %

    Returns:
        (list, list): training-set, validation-set
    """
    index = int((1 - validation_split) * len(tweets))
    random.shuffle(tweets)
    return tweets[:index], tweets[index:]


In [14]:
import utils

# Classifies a tweet based on the number of positive and negative words in it

TRAIN_PROCESSED_FILE = 'train-processed.csv'
TEST_PROCESSED_FILE = 'test-processed.csv'
POSITIVE_WORDS_FILE = 'positive-words.txt'
NEGATIVE_WORDS_FILE = 'negative-words.txt'
TRAIN = False


def classify(processed_csv, test_file=True, **params):
    file = open('/Users/saurabhkulkarni/Github-NLP/positive-words.txt', 'r')
    positive_words = [str(line).split("\n")[0] for line in file.readlines()]
    
    file1 = open('/Users/saurabhkulkarni/Github-NLP/negative-words.txt', 'r')
    negative_words = [str(line).split("\n")[0] for line in file.readlines()]

    predictions = []
    with open(processed_csv, 'r') as csv:
        for line in csv:
            if test_file:
                tweet_id, tweet = line.strip().split(',')
            else:
                tweet_id, label, tweet = line.strip().split(',')
            pos_count, neg_count = 0, 0
            for word in tweet.split():
                if word in positive_words:
                    print("positive word found :",word)
                    pos_count += 1
                elif word in negative_words:
                    print("negative word found :",word)
                    neg_count += 1
            print(pos_count, neg_count)
            prediction = 1 if pos_count >= neg_count else 0
            if test_file:
                predictions.append((tweet_id, prediction))
            else:
                predictions.append((tweet_id, int(label), prediction))
    return predictions


In [15]:
if TRAIN:
    predictions = classify(TRAIN_PROCESSED_FILE, test_file=(not TRAIN), positive_words=POSITIVE_WORDS_FILE, negative_words=NEGATIVE_WORDS_FILE)
    correct = sum([1 for p in predictions if p[1] == p[2]]) * 100.0 / len(predictions)
    print('Correct = %.2f%%' % correct)
else:
    predictions = classify(TEST_PROCESSED_FILE, test_file=(not TRAIN), positive_words=POSITIVE_WORDS_FILE, negative_words=NEGATIVE_WORDS_FILE)
    save_results_to_csv(predictions, 'baseline.csv')

0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : work
1 0
0 0
positive word found : wonder
1 0
positive word found : positive
1 0
positive word found : thanks
1 0
0 0
0 0
positive word found : ok
1 0
positive word found : right
1 0
positive word found : funny
1 0
positive word found : fine
1 0
0 0
positive word found : well
positive word found : happy
2 0
0 0
0 0
positive word found : pretty
1 0
0 0
0 0
0 0
0 0
positive word found : like
positive word found : right
2 0
0 0
0 0
0 0
positive word found : cute
1 0
0 0
0 0
positive word found : work
1 0
positive word found : like
1 0
positive word found : great
1 0
positive word found : like
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : well
1 0
0 0
0 0
0 0
0 0
positive word found : properly
1 0
positive word found : magic
positive word found : fun
2 0
0 0
positive word found : clean
positive word found : enjoyable
2 0
positive word found : boom
positive word found : boom
2 0
0 0
positive word found : congrats
1 0
0 0
positive word found 

positive word found : hot
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : happy
1 0
0 0
0 0
0 0
positive word found : better
1 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : well
1 0
0 0
positive word found : ok
positive word found : well
positive word found : well
positive word found : woo
4 0
positive word found : good
1 0
0 0
0 0
positive word found : great
1 0
0 0
positive word found : pretty
positive word found : thanks
positive word found : pretty
3 0
positive word found : right
1 0
0 0
positive word found : lead
1 0
0 0
positive word found : well
1 0
0 0
positive word found : fine
1 0
0 0
positive word found : amazing
1 0
0 0
positive word found : good
1 0
positive word found : thank
1 0
positive word found : favorite
1 0
0 0
0 0
positive word found : cool
positive word found : like
2 0
positive word found : happy
positive word found : loves
2 0
positive word found : good
1 0
0 0
0 0
positive word found : well
positive word found : thanks
2 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0

5 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : perfect
1 0
0 0
0 0
0 0
positive word found : best
1 0
0 0
0 0
positive word found : well
positive word found : enough
positive word found : fun
3 0
0 0
positive word found : happy
1 0
0 0
positive word found : well
1 0
0 0
positive word found : good
1 0
0 0
0 0
positive word found : smiles
positive word found : fairly
2 0
0 0
0 0
positive word found : love
1 0
0 0
0 0
0 0
0 0
positive word found : masterful
positive word found : yum
2 0
0 0
0 0
positive word found : like
1 0
positive word found : haha
1 0
0 0
0 0
0 0
positive word found : good
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
positive word found : happy
1 0
0 0
0 0
positive word found : good
1 0
0 0
0 0
0 0
positive word found : work
1 0
0 0
0 0
positive word found : nice
1 0
positive word foun

0 0
0 0
0 0
positive word found : progress
1 0
0 0
positive word found : like
1 0
0 0
0 0
0 0
positive word found : happy
1 0
positive word found : free
1 0
0 0
positive word found : yum
positive word found : yum
2 0
0 0
positive word found : love
positive word found : lol
2 0
0 0
positive word found : happy
1 0
0 0
0 0
0 0
0 0
positive word found : like
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : cute
1 0
0 0
positive word found : enough
1 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : love
1 0
0 0
positive word found : awesome
positive word found : yay
2 0
0 0
0 0
positive word found : loving
positive word found : right
2 0
0 0
0 0
positive word found : excellent
1 0
positive word found : excited
1 0
positive word found : excited
1 0
positive word found : wow
1 0
0 0
positive word found : fantastic
1 0
0 0
0 0
positive word found : angel
positive word found : angel
2 0
positive word found : golden
1 0
positive word found : good
1 0
positive word found : like
1 0
po

0 0
0 0
0 0
positive word found : well
1 0
positive word found : like
1 0
positive word found : like
1 0
positive word found : like
1 0
0 0
0 0
0 0
0 0
positive word found : lol
positive word found : thank
2 0
0 0
positive word found : great
1 0
positive word found : good
1 0
positive word found : good
1 0
positive word found : decent
positive word found : like
2 0
0 0
0 0
0 0
0 0
0 0
positive word found : like
1 0
positive word found : like
1 0
0 0
0 0
0 0
positive word found : work
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : pretty
1 0
0 0
0 0
0 0
positive word found : like
1 0
positive word found : good
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : like
1 0
positive word found : like
1 0
positive word found : like
positive word found : best
2 0
positive word found : like
1 0
positive word found : like
positive word found : fancy
positive word found : like
3 0
positive word found : love
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : good
1 0
0 0
positive 

0 0
0 0
0 0
0 0
positive word found : fail
positive word found : right
2 0
positive word found : better
1 0
positive word found : good
1 0
positive word found : great
1 0
0 0
positive word found : joy
positive word found : modern
2 0
0 0
0 0
positive word found : like
positive word found : right
2 0
positive word found : great
positive word found : humor
2 0
0 0
0 0
positive word found : lol
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : super
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : strong
1 0
positive word found : helping
positive word found : clean
2 0
0 0
positive word found : like
positive word found : like
2 0
0 0
0 0
0 0
0 0
0 0
positive word found : awesome
1 0
positive word found : cheer
positive word found : like
2 0
positive word found : heal
1 0
0 0
positive word found : like
1 0
positive word found : good
positive word found : enough
2 0
0 0
positive word found : right
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : grace
1 0
positive word fou

0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : fun
1 0
0 0
0 0
0 0
0 0
positive word found : fun
positive word found : fun
2 0
0 0
0 0
positive word found : happy
1 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : worth
1 0
0 0
0 0
positive word found : beautiful
positive word found : love
2 0
positive word found : beautiful
1 0
0 0
positive word found : like
1 0
positive word found : good
1 0
positive word found : great
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : fun
positive word found : love
2 0
positive word found : work
1 0
0 0
0 0
0 0
positive word found : right
1 0
0 0
0 0
0 0
positive word found : good
1 0
positive word found : lol
1 0
0 0
0 0
positive word found : like
1 0
0 0
positive word found : love
1 0
positive word found : love
positive word found : easy
2 0
positive word found : loved
1 0
positive word found : loves
1 0
0 0
positive word found : lucky
1 0
positive word found : lucky
1 0
positive word found : right
1 0
positive wor

positive word found : work
positive word found : happy
2 0
positive word found : best
1 0
0 0
0 0
0 0
0 0
positive word found : perfect
positive word found : perfect
2 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : plus
positive word found : fun
2 0
positive word found : ready
1 0
0 0
0 0
0 0
positive word found : magic
1 0
0 0
positive word found : smile
positive word found : lucky
positive word found : beautiful
positive word found : beautiful
4 0
0 0
positive word found : worth
1 0
positive word found : hilarious
positive word found : work
2 0
positive word found : refreshing
positive word found : right
2 0
0 0
0 0
0 0
0 0
positive word found : like
1 0
0 0
0 0
positive word found : yay
1 0
0 0
0 0
0 0
positive word found : ready
1 0
0 0
0 0
0 0
positive word found : lucky
1 0
0 0
0 0
positive word found : win
positive word found : win
2 0
positive word found : win
1 0
0 0
positive word found : hot
1 0
0 0
0 0
0 0
0 0
positive word found : good
positive word found : charitable
2 0


1 0
0 0
0 0
positive word found : lol
1 0
positive word found : like
1 0
positive word found : won
positive word found : better
positive word found : good
3 0
0 0
positive word found : happy
1 0
0 0
0 0
0 0
positive word found : high
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : exceeds
1 0
positive word found : best
1 0
positive word found : love
1 0
0 0
positive word found : fail
1 0
positive word found : lovely
1 0
0 0
positive word found : accurate
positive word found : enough
2 0
positive word found : well
1 0
positive word found : fun
positive word found : funny
positive word found : kind
positive word found : smart
4 0
positive word found : enough
1 0
positive word found : lol
1 0
positive word found : hot
1 0
positive word found : lover
positive word found : love
2 0
positive word found : supportive
positive word found : kind
positive word found : nice
3 0
positive word found : good
1 0
0 0
positive word found : likes
1 0
positive word found : funny
1 0
positive word found : sh

0 0
0 0
0 0
0 0
0 0
positive word found : witty
1 0
0 0
positive word found : peach
1 0
0 0
0 0
0 0
0 0
positive word found : ok
1 0
0 0
0 0
0 0
0 0
positive word found : right
positive word found : lol
2 0
0 0
0 0
positive word found : warm
positive word found : warm
2 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : brave
1 0
positive word found : enjoying
1 0
positive word found : like
positive word found : lush
2 0
positive word found : lucky
1 0
0 0
positive word found : good
positive word found : sexy
2 0
positive word found : good
positive word found : sexy
2 0
positive word found : good
1 0
0 0
positive word found : grand
positive word found : like
positive word found : soft
3 0
0 0
positive word found : grand
1 0
0 0
positive word found : fantastic
positive word found : tough
2 0
positive word found : success
positive word found : like
2 0
positive word found : bless
positive word found : good
positive word found : nice
3 0
0 0
positive word found : like
positive word found : l

0 0
0 0
0 0
0 0
positive word found : beautiful
1 0
positive word found : beautiful
1 0
positive word found : amazing
positive word found : love
positive word found : holy
3 0
positive word found : love
1 0
0 0
positive word found : beautiful
positive word found : awesome
2 0
positive word found : love
positive word found : favorite
2 0
positive word found : love
1 0
positive word found : clear
1 0
positive word found : awesome
1 0
0 0
positive word found : like
positive word found : cute
2 0
positive word found : well
positive word found : top
2 0
0 0
0 0
positive word found : love
1 0
positive word found : love
1 0
positive word found : beautiful
positive word found : funny
2 0
0 0
positive word found : cool
positive word found : good
2 0
positive word found : best
positive word found : love
2 0
0 0
positive word found : best
positive word found : beauty
positive word found : pros
positive word found : generous
4 0
0 0
positive word found : sweet
positive word found : thank
positive 

positive word found : top
positive word found : work
2 0
0 0
positive word found : love
1 0
positive word found : haha
positive word found : fun
2 0
0 0
positive word found : fast
1 0
0 0
0 0
positive word found : yay
positive word found : win
2 0
positive word found : best
1 0
0 0
0 0
0 0
0 0
positive word found : work
1 0
0 0
positive word found : like
positive word found : love
2 0
positive word found : like
positive word found : ready
2 0
0 0
0 0
positive word found : well
1 0
positive word found : clean
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : faithfully
1 0
positive word found : good
positive word found : surprised
2 0
positive word found : good
1 0
0 0
0 0
0 0
positive word found : gratitude
positive word found : thankful
positive word found : love
positive word found : enrich
4 0
positive word found : peaceful
1 0
positive word found : funny
positive word found : envy
2 0
0 0
0 0
positive word found : like
1 0
positive word found : idol
1 0
positive word found : fail
posit

positive word found : thanks
1 0
0 0
0 0
0 0
positive word found : like
positive word found : rocks
2 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : recommend
positive word found : kind
positive word found : generous
3 0
positive word found : recommend
positive word found : good
2 0
positive word found : recommend
positive word found : best
2 0
positive word found : recommend
positive word found : lovely
positive word found : funny
positive word found : witty
positive word found : interesting
positive word found : top
6 0
0 0
positive word found : ok
1 0
positive word found : love
1 0
positive word found : happy
positive word found : love
2 0
0 0
0 0
0 0
0 0
0 0
positive word found : favorite
positive word found : top
positive word found : love
3 0
0 0
0 0
positive word found : cool
1 0
0 0
0 0
0 0
0 0
positive word found : great
positive word found : enjoy
2 0
0 0
0 0
positive word found : classic
positive word found : great
2 0
positive word found : sweet
1 0
0 0
0 0

2 0
positive word found : like
1 0
0 0
0 0
positive word found : ok
positive word found : solid
2 0
0 0
0 0
0 0
positive word found : loves
positive word found : lol
2 0
positive word found : yummy
1 0
positive word found : love
1 0
positive word found : fabulous
positive word found : fascinating
2 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : best
positive word found : love
2 0
0 0
positive word found : better
1 0
positive word found : well
1 0
0 0
positive word found : well
positive word found : good
2 0
positive word found : cool
1 0
0 0
0 0
positive word found : like
1 0
positive word found : best
1 0
positive word found : cute
1 0
positive word found : good
1 0
0 0
0 0
0 0
0 0
positive word found : fun
positive word found : haha
2 0
positive word found : nice
1 0
positive word found : love
1 0
0 0
0 0
positive word found : happy
1 0
positive word found : right
1 0
positive word found : awesome
1 0
0 0
positive word found : like
1 0
0 0
0 0
0 0
0 0
positive word found : l

positive word found : work
1 0
positive word found : well
1 0
0 0
0 0
positive word found : enjoy
1 0
0 0
0 0
0 0
positive word found : hot
1 0
positive word found : survival
1 0
0 0
positive word found : master
1 0
positive word found : loves
1 0
0 0
0 0
0 0
0 0
0 0
positive word found : ready
1 0
positive word found : lol
positive word found : lol
positive word found : love
3 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : excited
positive word found : excited
2 0
positive word found : good
1 0
positive word found : like
1 0
positive word found : love
1 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : improvement
1 0
0 0
positive word found : breakthroughs
1 0
positive word found : top
1 0
0 0
positive word found : haha
positive word found : like
positive word found : good
3 0
positive word found : right
1 0
positive word found : best
1 0
positive word found : glad
1 0
positive word found : work
1 0
0 0
positive word found : love
1 0
positive word found : peace
1 0
0 0
positiv

positive word found : brilliant
1 0
positive word found : creative
1 0
0 0
0 0
positive word found : loving
1 0
0 0
positive word found : haha
positive word found : amazing
positive word found : love
3 0
0 0
positive word found : like
1 0
positive word found : love
1 0
0 0
positive word found : awesome
1 0
positive word found : creative
positive word found : brilliant
2 0
positive word found : love
positive word found : pure
2 0
0 0
positive word found : passion
1 0
positive word found : nice
1 0
positive word found : safe
positive word found : humour
positive word found : best
3 0
0 0
positive word found : hilarious
1 0
0 0
0 0
positive word found : love
1 0
0 0
positive word found : like
positive word found : love
2 0
positive word found : handsome
positive word found : well
2 0
0 0
positive word found : like
1 0
positive word found : cool
1 0
positive word found : better
1 0
positive word found : good
1 0
positive word found : yay
1 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : cl

positive word found : great
1 0
0 0
positive word found : like
positive word found : silly
2 0
positive word found : haha
1 0
0 0
positive word found : great
1 0
0 0
0 0
positive word found : better
positive word found : better
positive word found : better
positive word found : hero
4 0
positive word found : like
1 0
positive word found : proud
1 0
positive word found : good
1 0
positive word found : love
positive word found : fail
2 0
positive word found : good
positive word found : optimism
2 0
positive word found : happy
1 0
positive word found : best
1 0
0 0
0 0
0 0
0 0
0 0
0 0
positive word found : like
1 0
positive word found : like
1 0
positive word found : like
1 0
positive word found : love
positive word found : hot
positive word found : hot
3 0
positive word found : love
positive word found : love
2 0
0 0
0 0
0 0
positive word found : lol
1 0
positive word found : ready
1 0
positive word found : like
positive word found : good
positive word found : like
3 0
0 0
0 0
positive w

In [10]:
pwd

'/Users/saurabhkulkarni/SaurabhWorkspace/ADS-Project'