In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation,Bidirectional,SpatialDropout1D
from keras.models import Model
from keras.models import Sequential

In [2]:
data = pd.read_csv('../preprocess_data.csv')
data.drop(['task_1','text'], axis=1, inplace=True)
data.head()

Unnamed: 0,_id,task_2,task_3,task_4,text_clean
0,Gujarati_image_1618.jpg,Positive,Vulgar,Abusive,છોકર). ટીચર તમાર તાજમહેલ\r\n\r\nદેખ/ય છે.\r\n\...
1,Gujarati_image_31.jpg,Positive,Vulgar,Abusive,છોકરો : ના.\r\n છોકરી : કેમ?\r\n \r\n છોકરી : ...
2,Gujarati_image_1144.jpg,Negative,Vulgar,Abusive,"છોકરીઓ ગમે તેટલી\r\n ચાલક હોય,\r\n \r\n પણ છોક..."
3,Gujarati_image_1184.jpg,Negative,Vulgar,Abusive,"દોસ્તી કરો,પ્રેમ કરો, વફા કરો...\r\n અને બહુ મ..."
4,Gujarati_image_1643.jpg,Neutral,Vulgar,Abusive,"છોકરીઓ ગમે તેટલી\r\nચાલક હોય,\r\n\r\nપણ છોકરા ..."


In [3]:
test_data = pd.read_csv('../preprocess_test_data.csv')
test_data.drop(['task_1','Unnamed: 0','text'], axis=1, inplace=True)
test_data.head()

Unnamed: 0,_id,task_2,task_3,task_4,text_clean
0,Gujarati_image_1225.jpg,Neutral,Non Vulgar,Non-abusive,॥વિંદેશીગામડિયો\r\n અ |
1,Gujarati_image_1583.jpg,Negative,Vulgar,Abusive,ટીચર : સૌથી વધારે દુખાવો ક્યારે\r\nથાય?\r\nછોક...
2,Gujarati_image_1502.jpg,Positive,Vulgar,Abusive,પતિ: તુંમને જરાય પ્રેમ\r\nનથી કરતી...\r\n\r\nપ...
3,Gujarati_image_1487.jpg,Negative,Vulgar,Abusive,આખા ગોમ ના લોડા\r\nભોસ મા ભરી ને બેઠી\r\nહોય અ...
4,Gujarati_image_1497.jpg,Negative,Vulgar,Abusive,મિનરલ વોટર સિવાય ક્યારેય\r\nબીજું\r\nપાણી નો પ...


In [4]:
sentences = data['text_clean'].astype(str)
tokenizer = Tokenizer(num_words = 1500,split=' ')
tokenizer.fit_on_texts(sentences)
sequence = tokenizer.texts_to_sequences(sentences)

In [5]:
test_sentences = test_data['text_clean'].astype(str)
test_sequence = tokenizer.texts_to_sequences(test_sentences)

In [6]:
max_seq_len = 2500

index_of_words = tokenizer.word_index
print("No of unique words : ",len(index_of_words))

X = pad_sequences(sequence , maxlen = max_seq_len )
Y = data['task_2']

print(X)

No of unique words :  7000
[[   0    0    0 ...  536  134    1]
 [   0    0    0 ...  246 1062  318]
 [   0    0    0 ... 1067 1068  174]
 ...
 [   0    0    0 ...    0    0    5]
 [   0    0    0 ...    0    0    5]
 [   0    0    0 ...    1   17  433]]


In [7]:
test_X = pad_sequences(test_sequence , maxlen = max_seq_len )
test_Y = test_data['task_2']

print(test_X)

[[  0   0   0 ...   0   0 648]
 [  0   0   0 ... 206 394   1]
 [  0   0   0 ...   2   1   1]
 ...
 [  0   0   0 ...   1   1 324]
 [  0   0   0 ...  40   2   1]
 [  0   0   0 ...   0   0   5]]


In [8]:
embed_dim = 256
vocabSize = len(index_of_words)
lstm_out = 64

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.15, random_state = 0)
Y_true = Y_test
Y_train = pd.get_dummies(Y_train).values
Y_test = pd.get_dummies(Y_test).values

In [10]:
test_Y_true = test_Y
test_Y = pd.get_dummies(test_Y).values
print("test_Y:",test_Y)

test_Y: [[False  True False]
 [ True False False]
 [False False  True]
 ...
 [False  True False]
 [False  True False]
 [False  True False]]


# MODEL 1

In [15]:
model = Sequential()
model.add(Embedding(vocabSize, embed_dim,input_length = 2500))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3, activation='softmax'))
model.compile(loss = 'binary_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 2500, 256)         1792000   
                                                                 
 spatial_dropout1d (Spatial  (None, 2500, 256)         0         
 Dropout1D)                                                      
                                                                 
 lstm (LSTM)                 (None, 64)                82176     
                                                                 
 dense (Dense)               (None, 3)                 195       
                                                                 
Total params: 1874371 (7.15 MB)
Trainable params: 1874371 (7.15 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [16]:
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint("hasoc_a2.h5", monitor='val_loss', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto')

In [17]:
print(Y_true)

14     Negative
158    Positive
762    Positive
740    Positive
482    Negative
         ...   
721    Positive
651    Positive
782     Neutral
113    Positive
839    Positive
Name: task_2, Length: 134, dtype: object


In [18]:
print(Y_test)
classes = ['Negative','Neutral','Positive']

[[ True False False]
 [False False  True]
 [False False  True]
 [False False  True]
 [ True False False]
 [False False  True]
 [False False  True]
 [ True False False]
 [False False  True]
 [False False  True]
 [ True False False]
 [ True False False]
 [False False  True]
 [ True False False]
 [False False  True]
 [ True False False]
 [ True False False]
 [False False  True]
 [False False  True]
 [False False  True]
 [False  True False]
 [False False  True]
 [False False  True]
 [False  True False]
 [False False  True]
 [ True False False]
 [False False  True]
 [ True False False]
 [ True False False]
 [False False  True]
 [False  True False]
 [False False  True]
 [False False  True]
 [ True False False]
 [False  True False]
 [False  True False]
 [ True False False]
 [False  True False]
 [ True False False]
 [ True False False]
 [False  True False]
 [False False  True]
 [ True False False]
 [False False  True]
 [False  True False]
 [False False  True]
 [False  True False]
 [False False

In [19]:
model.fit(X_train, Y_train, batch_size = 32, epochs = 10, validation_data=(X_test,Y_test), callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.60074, saving model to hasoc_a2.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_loss improved from 0.60074 to 0.59117, saving model to hasoc_a2.h5
Epoch 3/10
Epoch 3: val_loss did not improve from 0.59117
Epoch 4/10
Epoch 4: val_loss improved from 0.59117 to 0.57304, saving model to hasoc_a2.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 0.57304
Epoch 6/10
Epoch 6: val_loss did not improve from 0.57304
Epoch 7/10
Epoch 7: val_loss did not improve from 0.57304
Epoch 8/10
Epoch 8: val_loss did not improve from 0.57304
Epoch 9/10
Epoch 9: val_loss did not improve from 0.57304
Epoch 10/10
Epoch 10: val_loss did not improve from 0.57304


<keras.src.callbacks.History at 0x207d67a0910>

In [20]:
model.load_weights('hasoc_a2.h5')
model.evaluate(X_test,Y_test)



[0.5730419158935547, 0.5373134613037109]

In [21]:
Y_pred = model.predict(test_X)



In [23]:
print("Y_pred:",Y_pred)

Y_pred: [[0.18888548 0.18534648 0.625768  ]
 [0.78411263 0.01164995 0.20423736]
 [0.20543034 0.07548866 0.71908104]
 ...
 [0.3436129  0.07706604 0.579321  ]
 [0.30682686 0.19628365 0.4968895 ]
 [0.20232376 0.17370537 0.62397087]]


In [27]:
pred_class = []
for i in Y_pred:
    pred_class.append(np.argmax(i))
print(pred_class)

pred_class_condition = pd.get_dummies(pred_class).values
print(pred_class_condition)

[2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 

In [29]:
print(classification_report(test_Y, pred_class_condition))

              precision    recall  f1-score   support

           0       0.09      0.39      0.15        31
           1       1.00      0.00      0.01       388
           2       0.25      0.63      0.35       185

   micro avg       0.22      0.22      0.22       604
   macro avg       0.45      0.34      0.17       604
weighted avg       0.72      0.22      0.12       604
 samples avg       0.22      0.22      0.22       604



In [30]:
pred_actual = []
for i in pred_class:
    if i == 0:
        pred_actual.append('Negative')
    elif i == 1 :
        pred_actual.append('Neutral')
    else:
        pred_actual.append('Positive')

In [31]:
test_data = test_data[["_id"]]
test_data["label"] = pred_actual
test_data.to_csv('dl_lstm_a2.csv',index=False)
test_data.head()

Unnamed: 0,_id,label
0,Gujarati_image_1225.jpg,Positive
1,Gujarati_image_1583.jpg,Negative
2,Gujarati_image_1502.jpg,Positive
3,Gujarati_image_1487.jpg,Positive
4,Gujarati_image_1497.jpg,Positive


# MODEL 2

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

model = Sequential()

# Embedding层，增加防止过拟合的Dropout
model.add(Embedding(input_dim=vocabSize, output_dim=embed_dim, input_length=2500))
model.add(Dropout(0.3))

# LSTM层，增加recurrent_dropout 和 output dropout
model.add(LSTM(units=lstm_out, dropout=0.3, recurrent_dropout=0.3, return_sequences=False))

# Batch Normalization增强泛化
model.add(BatchNormalization())

# 全连接层，Softmax输出3分类，建议用categorical_crossentropy
model.add(Dense(3, activation='softmax'))

# 编译
optimizer = Adam(learning_rate=0.001)  # 学习率也可调整
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 2500, 256)         1792000   
                                                                 
 dropout (Dropout)           (None, 2500, 256)         0         
                                                                 
 lstm (LSTM)                 (None, 64)                82176     
                                                                 
 batch_normalization (Batch  (None, 64)                256       
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 3)                 195       
                                                                 
Total params: 1874627 (7.15 MB)
Trainable params: 1874499 (7.15 MB)
Non-trainable params: 128 (512.00 Byte)
______________

In [12]:
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint("hasoc_b2.h5", monitor='val_loss', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto')

In [13]:
model.fit(X_train, Y_train, batch_size = 8, epochs = 10, validation_data = (X_test, Y_test), callbacks = [checkpoint])

Epoch 1/10
Epoch 1: val_loss improved from inf to 1.03551, saving model to hasoc_b2.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_loss improved from 1.03551 to 1.01267, saving model to hasoc_b2.h5
Epoch 3/10
Epoch 3: val_loss improved from 1.01267 to 1.00446, saving model to hasoc_b2.h5
Epoch 4/10
Epoch 4: val_loss did not improve from 1.00446
Epoch 5/10
Epoch 5: val_loss did not improve from 1.00446
Epoch 6/10
Epoch 6: val_loss did not improve from 1.00446
Epoch 7/10
Epoch 7: val_loss did not improve from 1.00446
Epoch 8/10
Epoch 8: val_loss did not improve from 1.00446
Epoch 9/10
Epoch 9: val_loss did not improve from 1.00446
Epoch 10/10
Epoch 10: val_loss did not improve from 1.00446


<keras.src.callbacks.History at 0x1e15943ceb0>

In [14]:
model.load_weights('hasoc_b2.h5')
model.evaluate(X_test,Y_test)



[1.0044573545455933, 0.5]

In [15]:
Y_pred = model.predict(test_X)



In [16]:
pred_class = []
for i in Y_pred:
    pred_class.append(np.argmax(i))
print(pred_class)

pred_class_condition = pd.get_dummies(pred_class).values
print(pred_class_condition)

[2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [17]:
print(classification_report(test_Y, pred_class_condition))

              precision    recall  f1-score   support

           0       0.08      0.13      0.10        31
           1       1.00      0.00      0.01       388
           2       0.29      0.88      0.44       185

   micro avg       0.28      0.28      0.28       604
   macro avg       0.46      0.34      0.18       604
weighted avg       0.74      0.28      0.14       604
 samples avg       0.28      0.28      0.28       604



In [18]:
pred_actual = []
for i in pred_class:
    if i == 0:
        pred_actual.append('Negative')
    elif i == 1 :
        pred_actual.append('Neutral')
    else:
        pred_actual.append('Positive')

In [19]:
test_data = test_data[["_id"]]
test_data["label"] = pred_actual
test_data.to_csv('dl_lstm_b2.csv',index=False)
test_data.head()

Unnamed: 0,_id,label
0,Gujarati_image_1225.jpg,Positive
1,Gujarati_image_1583.jpg,Negative
2,Gujarati_image_1502.jpg,Positive
3,Gujarati_image_1487.jpg,Positive
4,Gujarati_image_1497.jpg,Positive


# MODEL 3

In [11]:
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(
    "hasoc_c2.h5",
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=False, 
    mode='auto')

In [12]:
model = Sequential()
model.add(Embedding(vocabSize, embed_dim, input_length=2500))
model.add(LSTM(lstm_out, dropout=0.3, recurrent_dropout=0.3))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

from sklearn.utils.class_weight import compute_class_weight
# 假设Y_train已独热编码
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1, 2]), y=np.argmax(Y_train, axis=1))
class_weight_dict = dict(enumerate(class_weights))

model.fit(X_train, Y_train, epochs=10, batch_size=8, class_weight=class_weight_dict, validation_data=(X_test,Y_test), callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_loss improved from inf to 1.07681, saving model to hasoc_c2.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_loss improved from 1.07681 to 1.05034, saving model to hasoc_c2.h5
Epoch 3/10
Epoch 3: val_loss did not improve from 1.05034
Epoch 4/10
Epoch 4: val_loss did not improve from 1.05034
Epoch 5/10
Epoch 5: val_loss did not improve from 1.05034
Epoch 6/10
Epoch 6: val_loss did not improve from 1.05034
Epoch 7/10
Epoch 7: val_loss did not improve from 1.05034
Epoch 8/10
Epoch 8: val_loss did not improve from 1.05034
Epoch 9/10
Epoch 9: val_loss did not improve from 1.05034
Epoch 10/10
Epoch 10: val_loss did not improve from 1.05034


<keras.src.callbacks.History at 0x2350b252460>

In [13]:
model.load_weights('hasoc_c2.h5')
model.evaluate(X_test,Y_test)



[1.0503442287445068, 0.41791045665740967]

In [14]:
Y_pred = model.predict(test_X)



In [15]:
pred_class = []
for i in Y_pred:
    pred_class.append(np.argmax(i))
print(pred_class)

pred_class_condition = pd.get_dummies(pred_class).values
print(pred_class_condition)

[2, 0, 1, 1, 1, 0, 1, 0, 2, 0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 2, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 2, 0, 1, 1, 2, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 1, 0, 0, 1, 0, 2, 0, 2, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 1, 1, 0, 2, 2, 0, 1, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 1, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 1, 2, 0, 0, 2, 1, 0, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 0, 1, 2, 2, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 0, 2, 0, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 2, 2, 2, 1, 0, 1, 1, 1, 2, 1, 2, 0, 1, 0, 2, 0, 0, 1, 1, 2, 1, 1, 0, 1, 2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1, 1, 1, 1, 0, 1, 

In [16]:
print(classification_report(test_Y, pred_class_condition))

              precision    recall  f1-score   support

           0       0.09      0.58      0.15        31
           1       0.72      0.46      0.56       388
           2       0.15      0.12      0.13       185

   micro avg       0.36      0.36      0.36       604
   macro avg       0.32      0.39      0.28       604
weighted avg       0.51      0.36      0.41       604
 samples avg       0.36      0.36      0.36       604



In [17]:
pred_actual = []
for i in pred_class:
    if i == 0:
        pred_actual.append('Negative')
    elif i == 1 :
        pred_actual.append('Neutral')
    else:
        pred_actual.append('Positive')

In [18]:
test_data = test_data[["_id"]]
test_data["label"] = pred_actual
test_data.to_csv('dl_lstm_c2.csv',index=False)
test_data.head()

Unnamed: 0,_id,label
0,Gujarati_image_1225.jpg,Positive
1,Gujarati_image_1583.jpg,Negative
2,Gujarati_image_1502.jpg,Neutral
3,Gujarati_image_1487.jpg,Neutral
4,Gujarati_image_1497.jpg,Neutral
