In [2]:
from textblob import TextBlob
import pandas as pd
from spacy.tokenizer import Tokenizer
from spacy.lang.tr import Turkish
from tqdm import tqdm
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import numpy as np

In [3]:
data = pd.read_csv('lemessi10.csv')
data

Unnamed: 0,tweet
0,leo messi cristiano special competition among ...
1,poles stop leo messi
2,la liga goal assist king champions league top ...
3,leo messi became first player score goal diffe...
4,come tomorrow start work fenerbahçe
...,...
20099,via drawing lionel messi art lionelmessi barce...
20100,lionel messi made funny comment allegations ma...
20101,lionelmessi dont worry messi father go jail gi...
20102,lionel messi without detonating bomb


In [4]:
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def getPolarity(text):
    return TextBlob(text).sentiment.polarity

data['Subjectivity'] = data['tweet'].apply(getSubjectivity)
data['Polarity'] = data['tweet'].apply(getPolarity)

data

Unnamed: 0,tweet,Subjectivity,Polarity
0,leo messi cristiano special competition among ...,0.586190,0.225119
1,poles stop leo messi,0.000000,0.000000
2,la liga goal assist king champions league top ...,0.766667,0.200000
3,leo messi became first player score goal diffe...,0.466667,0.125000
4,come tomorrow start work fenerbahçe,0.000000,0.000000
...,...,...,...
20099,via drawing lionel messi art lionelmessi barce...,0.000000,0.000000
20100,lionel messi made funny comment allegations ma...,1.000000,0.250000
20101,lionelmessi dont worry messi father go jail gi...,0.375000,-0.050000
20102,lionel messi without detonating bomb,0.000000,0.000000


In [5]:
def getAnalysis(score):
    if score<0:
        return 'Negative'
    elif score==0:
        return 'Neutral'
    else:
        return 'Positive'
    
data['Analysis'] = data['Polarity'].apply(getAnalysis)
data

Unnamed: 0,tweet,Subjectivity,Polarity,Analysis
0,leo messi cristiano special competition among ...,0.586190,0.225119,Positive
1,poles stop leo messi,0.000000,0.000000,Neutral
2,la liga goal assist king champions league top ...,0.766667,0.200000,Positive
3,leo messi became first player score goal diffe...,0.466667,0.125000,Positive
4,come tomorrow start work fenerbahçe,0.000000,0.000000,Neutral
...,...,...,...,...
20099,via drawing lionel messi art lionelmessi barce...,0.000000,0.000000,Neutral
20100,lionel messi made funny comment allegations ma...,1.000000,0.250000,Positive
20101,lionelmessi dont worry messi father go jail gi...,0.375000,-0.050000,Negative
20102,lionel messi without detonating bomb,0.000000,0.000000,Neutral


In [6]:
from sklearn.preprocessing import LabelEncoder
#data['Analysis'] = preprocessing.LabelEncoder().fit_transform(data['Analysis'])
data["Analysis"] = LabelEncoder().fit_transform(data["Analysis"])
print(data['Analysis'])

0        2
1        1
2        2
3        2
4        1
        ..
20099    1
20100    2
20101    0
20102    1
20103    1
Name: Analysis, Length: 20104, dtype: int32


In [7]:
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
stop = stopwords.words('english')
data['tweet'] = data['tweet'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
data['tweet'].head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ugure\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


0    leo messi cristiano special competition among ...
1                                 poles stop leo messi
2    la liga goal assist king champions league top ...
3    leo messi became first player score goal diffe...
4                  come tomorrow start work fenerbahçe
Name: tweet, dtype: object

In [8]:
freq = pd.Series(' '.join(data['tweet']).split()).value_counts()[-10:]
data['tweet'] = data['tweet'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
data['tweet'].head()

0    leo messi cristiano special competition among ...
1                                 poles stop leo messi
2    la liga goal assist king champions league top ...
3    leo messi became first player score goal diffe...
4                  come tomorrow start work fenerbahçe
Name: tweet, dtype: object

In [9]:
import pandas as pd
import numpy as np
import spacy
from tqdm import tqdm
import re
import time
import pickle
pd.set_option('display.max_colwidth', 200)

In [10]:
import tensorflow_hub as hub
import tensorflow as tf

In [31]:
import tensorflow.compat.v1 as tf
#To make tf 2.0 compatible with tf1.0 code, we disable the tf2.0 functionalities

embed = hub.KerasLayer("https://tfhub.dev/google/elmo/2")
tf.compat.v1.disable_eager_execution()

In [16]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from sklearn import preprocessing
import keras
import numpy as np

In [17]:
y = list(data['Analysis'])
x = list(data['tweet'])

le = preprocessing.LabelEncoder()
le.fit(y)

def encode(le, labels):
    enc = le.transform(labels)
    return keras.utils.to_categorical(enc)

def decode(le, one_hot):
    dec = np.argmax(one_hot, axis=1)
    return le.inverse_transform(dec)


x_enc = x
y_enc = encode(le, y)

In [18]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(np.asarray(x_enc), np.asarray(y_enc), test_size=0.2, random_state=42)

In [19]:
x_train.shape

(16083,)

In [20]:
from keras.layers import Input, Lambda, Dense
from keras.models import Model
import keras.backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.datasets import mnist
from tensorflow.python.keras.utils import np_utils

In [None]:
def is_tensor(x):
     return isinstance(x, core_tf_types.Tensor) or tf_ops.is_dense_tensor_like(x)

def ELMoEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]

input_text = Input(shape=(1,), dtype=tf.string)
embedding = Lambda(ELMoEmbedding, output_shape=(1024, ))(input_text)
dense = Dense(256, activation='relu')(embedding)
pred = Dense(5, activation='softmax')(dense)
model = Model(inputs=[input_text], outputs=pred)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    history = model.fit(x_train, y_train, epochs=1, batch_size=16)
    model.save_weights('./elmo-model.h5')

with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    model.load_weights('./elmo-model.h5')  
    predicts = model.predict(x_test, batch_size=16)

y_test = decode(le, y_test)
y_preds = decode(le, predicts)

In [None]:
from sklearn import metrics

print(metrics.confusion_matrix(y_test, y_preds))

print(metrics.classification_report(y_test, y_preds))

from sklearn.metrics import accuracy_score

print("Accuracy of ELMO is:",accuracy_score(y_test,y_preds))