In [41]:
# !pip install tensorflow
# !pip install tensorflow_hub

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.optimizers.legacy import Adam

In [3]:
df = pd.read_csv('../Reddit Data/final_dataset_2024-03-16.csv', index_col=0)
df.head()

Unnamed: 0,preprocessed-text-removal,sentiment
0,reason love alcohol make feel numb numb real r...,negative
1,work related anxiety depression,negative
2,lately pretty serious anxiety depression work ...,positive
3,anyone else feel like belong anywhere,positive
4,matter around total stranger people love alway...,positive


In [4]:
df.sentiment.value_counts()

sentiment
negative    13953
positive     9320
neutral      6314
Name: count, dtype: int64

In [5]:
df['sentiment'] = np.where(df['sentiment'] == 'positive', 2, np.where(df['sentiment'] == 'negative', 1, 0)).astype(int)
df.head()

Unnamed: 0,preprocessed-text-removal,sentiment
0,reason love alcohol make feel numb numb real r...,1
1,work related anxiety depression,1
2,lately pretty serious anxiety depression work ...,2
3,anyone else feel like belong anywhere,2
4,matter around total stranger people love alway...,2


In [6]:
df = df.loc[(df.sentiment == 2) | (df.sentiment == 1)].reset_index(drop=True)
df

Unnamed: 0,preprocessed-text-removal,sentiment
0,reason love alcohol make feel numb numb real r...,1
1,work related anxiety depression,1
2,lately pretty serious anxiety depression work ...,2
3,anyone else feel like belong anywhere,2
4,matter around total stranger people love alway...,2
...,...,...
23268,always trouble making friend dating grew paren...,2
23269,stop feeling like friend going taken away popu...,2
23270,f college group friend boy since college enoug...,1
23271,please somebody help really know life anymore ...,2


In [7]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1024):
    df = dataframe.copy()
    labels = df.pop('sentiment').astype(int)
    df = df['preprocessed-text-removal']
    ds = tf.data.Dataset.from_tensor_slices((df, labels))
    if  shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(tf.data.AUTOTUNE)

    return ds

In [8]:
train, val, test = np.split(df.sample(frac=1), [int(0.8*len(df)), int(0.9*len(df))])

  return bound(*args, **kwds)


In [9]:
train_data = df_to_dataset(train)
val_data = df_to_dataset(val)
test_data = df_to_dataset(test)

In [33]:
# embedding = 'https://tfhub.dev/google/nnlm-en-dim50/2'
# hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

encoder = tf.keras.layers.TextVectorization(max_tokens=2000)
encoder.adapt(train_data.map(lambda text, label: text))

In [34]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'like', 'feel', 'anxiety', 'get', 'know', 'time',
       'people', 'want', 'even', 'really', 'life', 'thing', 'day', 'year',
       'go', 'one', 'would', 'friend'], dtype='<U14')

In [35]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim = len(encoder.get_vocabulary()),
        output_dim=32,
        mask_zero=True
    ),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [36]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [37]:
model.evaluate(train_data)
model.evaluate(val_data)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 350ms/step - accuracy: 0.3549 - loss: 0.6919
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 252ms/step - accuracy: 0.3628 - loss: 0.6919


[0.6918068528175354, 0.36012032628059387]

In [39]:
history = model.fit(train_data, epochs=100, validation_data=val_data)

Epoch 1/100
[1m10/19[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m12s[0m 1s/step - accuracy: 0.5147 - loss: 0.6492

In [45]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [46]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]