In [1]:
from itertools import islice
from youtube_comment_downloader import *
def getcomments(link):
    downloader = YoutubeCommentDownloader()
    comments = downloader.get_comments_from_url(link, sort_by=SORT_BY_POPULAR)
    data={'comment':[]}
    for comment in islice(comments, 100):#enter comment count here we have use 100
        data['comment'].append(comment["text"])
    return data

In [14]:
import keras.backend as K

def f1_score(y_true, y_pred):
  
    # Count positive samples.
    c1 = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    c2 = K.sum(K.round(K.clip(y_pred, 0, 1)))
    c3 = K.sum(K.round(K.clip(y_true, 0, 1)))

    # If there are no true samples, fix the F1 score at 0.
    if c3 == 0:
        return 0.0

    # How many selected items are relevant?
    precision = c1 / (c2 + K.epsilon())

    # How many relevant items are selected?
    recall = c1 / (c3 + K.epsilon())

    # Calculate f1_score
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

In [2]:
import pandas as pd
df = pd.DataFrame(getcomments("https://www.youtube.com/watch?v=kqtD5dpn9C8"), columns=['comment'])# enter link here
df.head()

Unnamed: 0,comment
0,🔥 Want to master Python? Get my complete Pytho...
1,I'm learning because i want a better job than ...
2,"This guy, sat for 1 hour and talked about pyth..."
3,0:00:00 Introduction \r\n0:00:30 What You Can ...
4,"Ik he won’t see this, but this was such a clea..."


In [3]:
df.drop_duplicates(inplace=True)

In [4]:
import nltk
import pandas as pd
from nltk.stem import PorterStemmer
import re
def preprocesing(comment):
    # remove html markup
    comment=re.sub("(<.*?>)","",comment)
#     # Remove Emails
#     comment = re.sub('\S*@\S*\s?', '', comment)
    #remove non-ascii and digits
    comment=re.sub("(\\W|\\d)"," ",comment)
    
    stopwords=['this','that','and','a','we','it','to','is','of','up','need']
    porter_stemmer=PorterStemmer()
    words=comment.split()
    res=[]
    for i in range(len(words)):
        if words[i].lower() not  in stopwords :#Stop Word Removal
            #Lowercasing
            res.append(words[i].lower())
            #Stemming
            res[-1]=porter_stemmer.stem(word=res[-1])
    
    return " ".join(res)

In [5]:
df["final"]=df["comment"].apply(preprocesing)

In [6]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [7]:
def getsentiments(comment):
    # instantiate analyzer
    analyzer = SentimentIntensityAnalyzer()
    if analyzer.polarity_scores(comment)['compound'] > 0.2: 
        return 1 # positive sentiment
#     elif analyzer.polarity_scores(comment)['compound'] < -0.2:
#         return 1 # negative sentiment
    else:
        return 0 # neutral sentiment

In [8]:
true_labels=df["comment"].apply(getsentiments)
true_labels

0     1
1     1
2     1
3     0
4     1
     ..
95    0
96    0
97    0
98    1
99    0
Name: comment, Length: 100, dtype: int64

In [9]:
from tensorflow.keras.utils import to_categorical
true_labels = to_categorical(true_labels, 2, dtype="float32")

In [10]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [11]:
comments =df["final"].to_numpy(dtype=str)
tokens=Tokenizer(num_words=5000)
tokens.fit_on_texts(comments)
seq=tokens.texts_to_sequences(comments)
comments=pad_sequences(seq,maxlen=200)
comments

array([[  0,   0,   0, ..., 310, 311, 312],
       [  0,   0,   0, ..., 314, 147, 315],
       [  0,   0,   0, ...,   4, 213, 101],
       ...,
       [  0,   0,   0, ...,  51, 641, 642],
       [  0,   0,   0, ...,  19,  79, 233],
       [  0,   0,   0, ...,   8, 650, 651]])

In [12]:
from tensorflow.keras.models import load_model

In [29]:
best_model = load_model("bidirectional_model.hdf5")


In [20]:
predictions =best_model.predict(comments)



In [21]:
predictions

array([[9.16766599e-02, 1.68711469e-02, 8.91452193e-01],
       [1.80528057e-03, 5.52051642e-04, 9.97642696e-01],
       [1.70026463e-03, 5.48138865e-04, 9.97751653e-01],
       [1.90285116e-03, 5.79775078e-04, 9.97517347e-01],
       [2.04752828e-03, 5.48333221e-04, 9.97404158e-01],
       [2.38609547e-03, 5.38451248e-04, 9.97075438e-01],
       [1.79279852e-03, 5.65336784e-04, 9.97641921e-01],
       [3.15367989e-03, 5.83941233e-04, 9.96262372e-01],
       [2.20816187e-03, 5.52707934e-04, 9.97239113e-01],
       [1.66511233e-03, 5.53363061e-04, 9.97781575e-01],
       [2.02666922e-03, 5.51929174e-04, 9.97421384e-01],
       [1.97712402e-03, 5.41888527e-04, 9.97481048e-01],
       [1.82505627e-03, 5.70301258e-04, 9.97604609e-01],
       [1.71184470e-03, 5.66577015e-04, 9.97721612e-01],
       [1.98498461e-03, 5.47577336e-04, 9.97467399e-01],
       [2.94146431e-03, 6.17297250e-04, 9.96441185e-01],
       [2.30066688e-03, 5.57182822e-04, 9.97142136e-01],
       [1.58182590e-03, 5.30820

In [22]:
best_model.evaluate(comments,true_labels)



[1.278875470161438, 0.7400000095367432]

In [17]:
import numpy as np
sentiment = ['Negative','Positive']
final=np.around(predictions).argmax(axis=1)
final

array([0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2,
       2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [15]:
load_model("bidirectional_model.hdf5",custom_objects={"f1_score":f1_score}).evaluate(comments,true_labels)



[1.2199214696884155, 0.5, 0.5, 0.5, 0.4453125]

In [16]:
load_model("bidirectionalLSTMGRU_model2.hdf5",custom_objects={"f1_score":f1_score}).evaluate(comments,true_labels)



[0.7886825799942017, 0.5, 0.5, 0.5, 0.4453125]

In [17]:
load_model("bidirectionalLSTMGRU_model.hdf5",custom_objects={"f1_score":f1_score}).evaluate(comments,true_labels)

ValueError: in user code:

    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\training.py", line 1727, in test_function  *
        return step_function(self, iterator)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\training.py", line 1713, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\training.py", line 1701, in run_step  **
        outputs = model.test_step(data)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\training.py", line 1667, in test_step
        self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\training.py", line 1052, in compute_loss
        return self.compiled_loss(
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\losses.py", line 272, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\losses.py", line 1990, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "C:\Users\conec\anaconda3\lib\site-packages\keras\backend.py", line 5529, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 2) and (None, 3) are incompatible


In [81]:
load_model("CNNLSTM_model.hdf5").evaluate(comments,true_labels)



[1.1773388385772705, 0.4444444477558136]

In [82]:
load_model("hyperpartric_model.hdf5").evaluate(comments,true_labels)



[1.90447998046875, 0.5858585834503174]