# Sentiment Analysis from Tweets using Logistic Regression

Taken from: https://www.twilio.com/blog/2017/12/sentiment-analysis-scikit-learn.html

In [1]:
import random

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
POS_TWEETS_FILE = "..\datasets\logistic_regression_pos_tweets.txt"
NEG_TWEETS_FILE = "..\datasets\logistic_regression_neg_tweets.txt"

In [3]:
data = []
data_labels = []

with open(POS_TWEETS_FILE, encoding="utf-8") as f:
    for i in f: 
        data.append(i) 
        data_labels.append('pos')

with open(NEG_TWEETS_FILE, encoding="utf-8") as f:
    for i in f: 
        data.append(i)
        data_labels.append('neg')

In [4]:
vectorizer = CountVectorizer(
    analyzer = 'word',
    lowercase = False
)
features = vectorizer.fit_transform(data)
features_nd = features.toarray() # for easy usage

In [5]:
features_nd

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [6]:
X_train, X_test, y_train, y_test  = train_test_split(
        features_nd, 
        data_labels,
        train_size=0.80, 
        random_state=1234)



In [7]:
log_model = LogisticRegression(solver='lbfgs')
log_model = log_model.fit(X=X_train, y=y_train)

In [8]:
y_pred = log_model.predict(X_test)

In [14]:
j = random.randint(0, len(X_test) - 7)

for i in range(j, j+7):
    print(y_pred[i])
    ind = features_nd.tolist().index(X_test[i].tolist())
    print(data[ind].strip())

pos
"@DonnieWahlberg YOU give me joy....and you have for years. You are such an amazing man and we are all lucky to know you. "
neg
"is at work and saddened due to Ted Baker London not being able to ship to the continental US  I want the hat!"
neg
"got highly bored today scanning 24 pages for daddy. Had to do it! "
neg
"My wife took my last ï¿½20, no McDonalds breakfast for me today "
pos
"Uploading pics from Sunday night out - it was a good night, lots of fun, and I got to know my boyfriend's sister better "
neg
"i feel unloved. dropped 2 tweeters "
neg
"@cateycancer ohhh??? That bad huh  u still got pink in your hair where you working??"


In [10]:
print(accuracy_score(y_test, y_pred))

0.800498753117207
