In [None]:
# libs
import sys
import csv
import time

import libs.bag_of_worder as bag_of_worder
import libs.preprocessor as tweet_preproc

## Load Model

In [None]:
from joblib import dump, load

# Load Model
try:
    classifier = load('model/logistic.joblib') 
    print("Model loaded!")

except:
    print("ERROR: Model not loaded")

## Load Dictionary

In [None]:
# Init dict
wordDict = []

path = "model/dictionary.txt"
with open(path, 'r', newline='', encoding="utf-8") as input_file:    
    for row in input_file:
        wordDict.append(row.strip())

# Get the stats
print("Dict Dimension: " + str(len(wordDict)))

## Load Objects

In [None]:
# Init Preprocessor
twitterPreprocessor = tweet_preproc.TwitterPreprocessor()

# Init Bag-of-Worder using the dictionary
countBoW = bag_of_worder.BagOfWorder(wordDict)

## Tweet Preprocess Pipeline

In [None]:
def predictTweets(tweets):
    
    preds = []
    for tweet in tweets:
        preds.append(predictTweet(tweet))

    return preds


def predictTweet(tweet,min_confidence=0.5):
    
    # Preprocess
    tweet = twitterPreprocessor.preprocess(tweet)
    
    # Create a one hot matrix of the words in the tweet
    oneHotTweet = countBoW.computeLine(tweet)
    
    # Check performance
    prob_dem, prob_rep = classifier.predict_proba(oneHotTweet)[0]
    
    # Compare to min confidence level
    if(prob_dem > min_confidence):
        return 0
    elif(prob_rep > min_confidence):
        return 1
    else:
        return -1
    

## Load tweets

In [None]:
path = "data/general/2016-10-21/tweets.csv"

MIN_CONFIDENCE = 0.8

with open(path, 'r', newline='', encoding="utf-8") as csvfile:
    
    # init reader
    reader = csv.reader(csvfile, quotechar='"', delimiter=',')

    # Taking the header of the file + the index of useful columns:
    header = next(reader)
    ind_createdAt = header.index('created_at')
    ind_text = header.index('text')
    ind_description = header.index('description')
    ind_location = header.index('location')
    
    # Init counter
    tweet_counter = 0

    # go through rows
    for row in reader:
        
        # get data
        created_at = row[ind_createdAt]
        text = row[ind_text]
        description = row[ind_description]
        location = row[ind_location]
        
        # predict
        print(predictTweet(text,min_confidence=MIN_CONFIDENCE))
        #print(predictTweet(description,min_confidence=MIN_CONFIDENCE))
        
        print("\n")
        print(text)
        #print(description)
        print("\n")
        
        # increment counter
        tweet_counter = tweet_counter + 1