In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Get Universal Sentence Encoder from TensorFlow Hub

In [None]:
USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")

# Get Dataset

In [None]:
from sklearn.utils import shuffle

df = pd.read_csv('./data/edited/data.csv')
df = shuffle(df)
df

### Reduce dataset and transform labels

In [None]:
df = df[:100000]

# Transform target from labels to numbers
decode_map = {"negative": 0, "positive": 1}
def decode_sentiment(label):
    return decode_map[label]

df['polarity'] = df['polarity'].apply(lambda x: decode_sentiment(x))

x = df['text']
y = df['polarity']

### Split features and labels

In [None]:
x = df['text']
y = df['polarity']

# Process Dataset

## Convert Sentence to Embedding Vector

In [None]:
x_emb = []

for i in tqdm(x):
    emb = USE(i)
    sentence_emb = tf.reshape(emb, [-1]).numpy()
    x_emb.append(sentence_emb)

x_emb = np.array(x_emb)

## Train and Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    x_emb,
    y,
    test_size=0.2,
    #random_state=42
)

## Process Labels

In [None]:
y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
y_test = np.asarray(y_test).astype('float32').reshape((-1,1))

# Build the Model

In [None]:
# Build model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, input_shape=(X_train.shape[1],), activation='relu'),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(0.001),
    metrics=['accuracy']
)

model.summary()

### Train

In [None]:
results = model.fit(
    X_train,
    y_train,
    epochs=20,
    validation_split=0.2,
    shuffle=True,
    batch_size=128
)

### Evaluate

In [None]:
model.evaluate(X_test, y_test)

### New Predictions

In [None]:
def decode_prediction(x):
    if x >= 0.5:
        return 'Positive'
    elif x < 0.5:
        return 'Negative'
    else:
        return 'Error'


sentence =  'i think you are amazing keep up the great work'
emb = USE(sentence)
sentence_emb = tf.reshape(emb, [-1]).numpy()
sentence_emb = np.array(sentence_emb).reshape(1,-1)

prediction = model.predict(sentence_emb)
prediction = decode_prediction(prediction)

prediction

# Save Model

In [None]:
model.save('model.h5')