##### Text Classification Using Keras
0 - Negative

1 - Positive

In [33]:
from warnings import filterwarnings; filterwarnings('ignore')

# Step-1: Data Ingestion
# ----------------------

import pandas as pd; df = pd.read_csv('Restaurant_Reviews.tsv', sep="\t"); df.head(3)

# Step-2: Data Sanity Checks (Duplicates, Null values removal)
# ------------------------------------------------------------

duplicate_count = df.duplicated().sum()
if duplicate_count > 0:
    print('Removing Duplicates...'); df = df.drop_duplicates(keep='first').reset_index(drop=True)
    print('Removed Duplicates.')
else:
    print('No Duplicates Found.')

m = df.isna().sum()
mm = m[m > 0]

if not mm.empty:
    print('Null values found')
else:
    print('Null values not found')

# Step-3: Separate X and Y
# ------------------------

import re
def clean_text(text: str) -> str:
  text = text.lower()
  text = re.sub("[^a-z ]", "", text)
  return text

df["cleaned"] = df["Review"].apply(clean_text)

X = df['cleaned']; Y = df['Liked']

# Step-4: Applying Preprocessing on X
# -----------------------------------

from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
tfidf.fit(X)

X_Pre = tfidf.transform(X).toarray()
X_Pre[0:2]

# TFIDF -> SVD (Singular Value Decomposition)
# LSA - Latent Semantic Analysis

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=300, random_state=42)
svd.fit(X_Pre)

X_Pre_Red = svd.transform(X_Pre)

# Step-5: Apply Train Test Split
# ------------------------------

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X_Pre_Red, Y, test_size=0.2, random_state=42)

# Step-6: Build Neural Network
# ----------------------------

from keras.models import Sequential
from keras.layers import Input, Dense, Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

model = Sequential(
    layers = [
        # Shape should have columns(features) at first point
        Input(shape=(xtrain.shape[1], )),
        Dense(256, activation='relu', kernel_regularizer=l2(0.02)),
        Dropout(0.3),
        Dense(128, activation='relu', kernel_regularizer=l2(0.02)),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ]
)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
es = EarlyStopping(patience=10, restore_best_weights=True, verbose=False)

model.fit(xtrain, ytrain, validation_split=0.2, epochs=60, callbacks=[es], verbose=False)

# Step-7: Evaluate Model
# ----------------------

train_loss, train_acc = model.evaluate(xtrain, ytrain, verbose=False)
test_loss, test_acc = model.evaluate(xtest, ytest, verbose=False)

print('Train Loss:', round(train_loss, 2), ', Train Accuracy:', round(train_acc, 2))
print('Test Loss:', round(test_loss, 2), ', Test Accuracy:', round(test_acc, 2))

ytest_prob = model.predict(xtest, verbose=False)

ytest_pred = [1 if prob >=0.5 else 0 for prob in ytest_prob]
ytest_pred[0:3]

# Step-8: Out of Sample Prediction (Single Strings as I/P)
# --------------------------------------------------------

def predict_sentiment(text: str):
    text = clean_text(text)
    text_pre = tfidf.transform([text]).toarray()
    text_pre_red = svd.transform(text_pre)
    text_pre_prob = model.predict(text_pre_red, verbose=False)
    text_pre_pred = "Positive" if text_pre_prob >=0.5 else "Negative"
    return text_pre_pred, text_pre_prob

stmt = "The movie was really bad and story felt boring"
print('String:[', stmt,']| Sentiment:' ,predict_sentiment(stmt))

stmt = "Awesome just liked the taste!..."
print('String:[', stmt,']| Sentiment:' ,predict_sentiment(stmt))

stmt = "The taste of pizza was very good!..."
print('String:[', stmt,']| Sentiment:' ,predict_sentiment(stmt))

stmt = "voilence voilence i do not like it"
print('String:[', stmt,']| Sentiment:' ,predict_sentiment(stmt))

Removing Duplicates...
Removed Duplicates.
Null values not found
Train Loss: 0.45 , Train Accuracy: 0.94
Test Loss: 0.68 , Test Accuracy: 0.79
String:[ The movie was really bad and story felt boring ]| Sentiment: ('Negative', array([[0.10440719]], dtype=float32))
String:[ Awesome just liked the taste!... ]| Sentiment: ('Positive', array([[0.9214881]], dtype=float32))
String:[ The taste of pizza was very good!... ]| Sentiment: ('Positive', array([[0.93968755]], dtype=float32))
String:[ voilence voilence i do not like it ]| Sentiment: ('Negative', array([[0.02293042]], dtype=float32))
