In [17]:
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import sentiment as st

In [7]:
sentiment = st.read_raw_sentiment('data/sentiment.tar.gz')

-- train data
sentiment/train.tsv
4582
-- dev data
sentiment/dev.tsv
458
-- unlabeled data
sentiment/unlabeled.tsv
91524


In [8]:
class FeatureVectorizer:

  def __init__(self, train_data, vectorizer_option, ngram_range, binary=False):
    self.train_data = train_data
    self.vectorizer_option = vectorizer_option
    
    kwargs = {
      'ngram_range': ngram_range,
      'binary': binary,
    }
    
    if vectorizer_option == 'tfidf':
      self.vectorizer = TfidfVectorizer(smooth_idf=True, **kwargs)
    elif vectorizer_option == 'count':
      self.vectorizer = CountVectorizer(**kwargs)
    else:
      options = ['tfidf', 'count']
      raise ValueError('Options are:' + str(options))
      
    self.vectorizer.fit(self.train_data)
    
  def transform(self, processed_documents):
    """
    Turn processed documents into feature matrix
    """
    return self.vectorizer.transform(processed_documents)
  
  def get_feature_names(self):
    return self.vectorizer.get_feature_names()

In [12]:
fv = FeatureVectorizer(sentiment.train_data, 'count', (1,1))
trainX = fv.transform(sentiment.train_data)
trainy = sentiment.trainy

clf = LogisticRegression()
clf.fit(trainX, trainy)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [42]:
def predict(sentence):
  x = fv.transform([sentence])
  preds = clf.predict(x)
  probs = clf.predict_proba(x)
  print("Prediction:", preds[0])
  print("Prob negative:", probs[0][0])
  print("Prob pos:", probs[0][1])

In [43]:
interact(predict, sentence="Hello what is this")

interactive(children=(Text(value='Hello what is this', description='sentence'), Output()), _dom_classes=('widgâ€¦

<function __main__.predict(sentence)>