In [2]:
import pandas as pd
import numpy as np
from textblob import TextBlob 
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [4]:
df = pd.read_csv('../csv/twitter/result/clean_tweets_es_trends.csv')


In [4]:
def getsubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def getpolarity(text):
    return TextBlob(text).sentiment.polarity

In [5]:
df['subjectivity'] = df['clean_text'].apply(getsubjectivity)
df['polarity'] = df['clean_text'].apply(getpolarity)

In [7]:
def getsentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(text)
    return sentiment

In [8]:
compound = []
neg = []
pos = []
neu = []
SIA = 0

for i in range(0, len(df['clean_text'])):
    SIA = getsentiment(df['clean_text'][i])
    compound.append(SIA['compound'])
    neg.append(SIA['neg'])
    neu.append(SIA['neu'])
    pos.append(SIA['pos'])


In [9]:
df['compound'] = compound
df['neg'] = neg
df['neu'] = neu
df['pos'] = pos


In [18]:
X = df1.drop(columns=['previous_trend'])
y = df1['previous_trend']

X = np.array(X)
y = np.array(y)


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [20]:
model = LinearDiscriminantAnalysis().fit(X_train, y_train)


In [21]:
predictions = model.predict(X_test)
predictions

array([1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1.,
       1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1.])

In [22]:
y_test

array([1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
       0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1.])

In [23]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

         0.0       0.55      0.46      0.50        13
         1.0       0.67      0.74      0.70        19

    accuracy                           0.62        32
   macro avg       0.61      0.60      0.60        32
weighted avg       0.62      0.62      0.62        32



In [24]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix

print('precision score: ' + str(precision_score(y_test, predictions, average='binary')))
print('recall score: ' + str(recall_score(y_test, predictions, average='binary')))

precision score: 0.6666666666666666
recall score: 0.7368421052631579


In [25]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification


X, y = make_classification(n_features=4, random_state=0)
clf = make_pipeline(StandardScaler(),
                    LinearSVC(random_state=0, tol=1e-5))
clf.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

In [26]:
predictions = clf.predict(X_test)

In [27]:
print(classification_report(y_test, predictions))
print('precision score: ' + str(precision_score(y_test, predictions, average='binary')))
print('recall score: ' + str(recall_score(y_test, predictions, average='binary')))

              precision    recall  f1-score   support

         0.0       0.56      0.38      0.45        13
         1.0       0.65      0.79      0.71        19

    accuracy                           0.62        32
   macro avg       0.60      0.59      0.58        32
weighted avg       0.61      0.62      0.61        32

precision score: 0.6521739130434783
recall score: 0.7894736842105263


In [29]:
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [31]:
predictions = neigh.predict(X_test)

In [32]:
print(classification_report(y_test, predictions))
print('precision score: ' + str(precision_score(y_test, predictions, average='binary')))
print('recall score: ' + str(recall_score(y_test, predictions, average='binary')))

              precision    recall  f1-score   support

         0.0       0.38      0.38      0.38        13
         1.0       0.58      0.58      0.58        19

    accuracy                           0.50        32
   macro avg       0.48      0.48      0.48        32
weighted avg       0.50      0.50      0.50        32

precision score: 0.5789473684210527
recall score: 0.5789473684210527


In [33]:
from sklearn.ensemble import RandomForestClassifier

In [53]:
rfc = RandomForestClassifier(max_depth=7, random_state=0, n_estimators=150)
rfc.fit(X_train, y_train)

RandomForestClassifier(max_depth=7, n_estimators=150, random_state=0)

In [54]:
predictions = rfc.predict(X_test)

In [55]:
print(classification_report(y_test, predictions))
print('precision score: ' + str(precision_score(y_test, predictions, average='binary')))
print('recall score: ' + str(recall_score(y_test, predictions, average='binary')))

              precision    recall  f1-score   support

         0.0       0.67      0.46      0.55        13
         1.0       0.70      0.84      0.76        19

    accuracy                           0.69        32
   macro avg       0.68      0.65      0.65        32
weighted avg       0.68      0.69      0.67        32

precision score: 0.6956521739130435
recall score: 0.8421052631578947
