# Sentiment using TextBlob

This model will simply use TextBlob to determine the sentiment of a phrase.

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train = pd.read_csv('../../data/train.tsv', sep='\t')
test = pd.read_csv('../../data/test.tsv', sep='\t')



## Calculate Sentiments

We'll calculate sentiments using various libraries.

In [2]:
from textblob import TextBlob

def text_blob_sentiment(phrase):
    return TextBlob(phrase).sentiment.polarity

def get_text_blob_sentiments(phrases):
    sentiments = map(text_blob_sentiment, phrases)
    return pd.DataFrame({'sentiment': sentiments})

X = get_text_blob_sentiments(train.Phrase)

In [None]:
def cv(X, y):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.svm import SVC

    forest = RandomForestClassifier(n_estimators=500)
    boost = AdaBoostClassifier()
    svc = SVC()

    from sklearn.cross_validation import cross_val_score
    import time

    t0 = time.time()
    print "Random Forest cross validation runnning..."
    forest_score = cross_val_score(forest, X, y).mean()
    print "Random Forest Score: %2.2f" % forest_score
    print "dt: %f" % (time.time() - t0)
    print ""

    t0 = time.time()
    print "AdaBoost cross validation runnning..."
    boost_score = cross_val_score(boost, X, y).mean()
    print "AdaBoost Score:      %2.2f" % boost_score
    print "dt: %f" % (time.time() - t0)
    print ""

    t0 = time.time()
    print "SVC cross validation runnning..."
    svc_score = cross_val_score(svc, X, y).mean()
    print "SVC Score:           %2.2f" % svc_score
    print "dt: %f" % (time.time() - t0)
    print ""
    
cv(X[:5000], train.Sentiment[:5000])

Random Forest cross validation runnning...
Random Forest Score: 0.58
dt: 8.051705

AdaBoost cross validation runnning...
AdaBoost Score:      0.58
dt: 1.259727

SVC cross validation runnning...
SVC Score:           0.59
dt: 3.209172



## Learn with SVC

In [None]:
from sklearn.svm import SVC
svc = SVC()

print "training SVC..."
X_train = get_text_blob_sentiments(train.Phrase)
y_train = train.Sentiment
svc.fit(X_train, y_train)

# Predict using training data
train_pred = svc.predict(X_train)
results_train = pd.DataFrame({
    'PhraseId': train.PhraseId,
    'Predicted': train_pred,
    'Sentiment': train.Sentiment
})
results_train.to_csv('results_train.csv', index=False)

print "predicting..."
X_test = get_text_blob_sentiments(test.Phrase)
y_pred = svc.predict(X_test)

results_test = pd.DataFrame({
    'PhraseId': test.PhraseId,
    'Sentiment': y_pred
})

results_test.to_csv('results_test.csv', index=False)
print "done."