In [85]:
import pandas as pd
import numpy as np
from textblob import TextBlob
from sklearn.metrics import classification_report, confusion_matrix
pd.set_option('max_colwidth', 400)

In [121]:
pos_cutoff = 0.2
neg_cutoff = 0

### Import validation data for running a baseline model

In [3]:
val = np.load('../data/interim/val_data.npy', allow_pickle=True)

### Upload columns from raw dataset

In [6]:
df = pd.read_csv('../data/raw/Tweets.csv')
cols = df.columns

In [32]:
#create dev DataFrame
val = pd.DataFrame(val, columns=cols)

In [33]:
val.shape

(1464, 15)

### Apply textblob.sentiment method to get sentiment from text column

`TextBlob.sentiment` returns a tuple of (*polarity*, *subjectivity*).  Not worried about the subjectivity right now so we'll just pull the polarity score.  

In [150]:
val['polarity'] = val.text.apply(lambda x: TextBlob(x).sentiment[0])

### Based on descriptive stats, bin data based on IQR 0 >= neg, and 0.2 <= pos

In [151]:
val['polarity'].describe()

count    1464.000000
mean        0.052598
std         0.316773
min        -1.000000
25%         0.000000
50%         0.000000
75%         0.200000
max         1.000000
Name: polarity, dtype: float64

### Create labels of target data based on polarity score

In [152]:
def polarity_to_label(polarity_score):
    if polarity_score >= pos_cutoff:
        return 2
    elif polarity_score > neg_cutoff:
        return 1
    else: 
        return 0


In [153]:
val['labels'] = val['polarity'].apply(lambda x: polarity_to_label(x))

### Create true target labels based on airline sentiment

In [160]:
def sent_to_label(airline_sentiment):
    if airline_sentiment == 'positive':
        return 2
    elif airline_sentiment == 'neutral':
        return 1
    elif airline_sentiment == 'negative':
        return 0

In [155]:
val['y_label'] = val['airline_sentiment'].apply(lambda x: sent_to_label(x))

In [156]:
val.rename({'labels':'predicted', 'y_label':'actual'}, axis=1, inplace=True)

### Create and interpret Classification Report

In [157]:
print(classification_report(val['actual'], val['predicted'], target_names=['negative', 'neutral', 'positive']))

              precision    recall  f1-score   support

    negative       0.71      0.73      0.72       913
     neutral       0.22      0.10      0.14       304
    positive       0.41      0.65      0.50       247

    accuracy                           0.58      1464
   macro avg       0.45      0.49      0.45      1464
weighted avg       0.56      0.58      0.56      1464



$\qquad$ Overall accuracy for a naive model isn't horrible, it's almost twice as good as random guessing, which would be about 1 out of 3 correct or 33%.  This model captured 58%.  Not surprisingly, the precision for negative responses was highest (there were a lot of negative responses), while the worst scores were in the neutral category.  
$\qquad$  We'll need a more nuanced model to capture the "neutral" sentiment.  Our recall for neutral is an abysymal 10%, meaning we are systematically misclassifying this category.  The main driver, of course, for this model are the pos and neg cutoff thresholds that we defined earlier.  No doubt, the model predictions will likely differ greatly depending on the thresholds that we define. 

In [159]:
confusion_matrix(val['actual'], val['predicted'], labels=[0, 1, 2])

array([[662,  86, 165],
       [207,  30,  67],
       [ 65,  21, 161]])