In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import joblib
import os
import pandas as pd
import numpy as np
import gzip

In [5]:
def transform_label(num):
    if num == 0:
        return 'negative'
    elif num == 1:
        return 'neutral'
    elif num == 2:
        return 'positive'
    else:
        raise ValueError("Invalid label")

In [6]:
train = pd.read_csv('data/processed_train.csv')
tfidf_vec = TfidfVectorizer(max_features=10000).fit(train['text'])
bog_vec = CountVectorizer(max_features=10000).fit(train['text'])
binary_vec = CountVectorizer(max_features=10000, binary=True).fit(train['text'])

In [7]:
k_means_model = joblib.load('weights/k_means.pkl')
random_forest_model = joblib.load('weights/random_forest.pkl')
logistic_model = joblib.load('weights/logistic_regression.pkl')
with gzip.open("weights/knn.pkl.gz", "rb") as f:
    knn_model = joblib.load(f)
with gzip.open("weights/svm.pkl.gz", "rb") as f:
    svm_model = joblib.load(f)

In [8]:
import torch
import torch.nn as nn

class SentimentMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim=100, output_dim=3):
        super(SentimentMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)
    
nn_model = SentimentMLP(input_dim=10000)
nn_model.load_state_dict(torch.load('weights/binary_mlp.pt'))

  nn_model.load_state_dict(torch.load('weights/binary_mlp.pt'))


<All keys matched successfully>

In [9]:
from keras.models import Sequential
from keras.layers import Embedding,Dropout,Dense,LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model


model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=256, input_length= 24))  # Input layer
model.add(LSTM(256, return_sequences=True))  # First LSTM layer, returns sequences
model.add(Dropout(0.5))  # Dropout for regularization
model.add(LSTM(128, return_sequences=True))  # Second LSTM layer, does not return sequences by default
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dense(64,activation='relu'))
model.add(Dense(3, activation='softmax'))

model.load_weights('weights/lstm.h5')
lstm_model = model

total_word=10000
token=Tokenizer(num_words=total_word)
token.fit_on_texts(train['text'])
sequences=token.texts_to_sequences(train['text'])





In [15]:
def inference(model, text):
    if model == 'knn':
        x_predict = tfidf_vec.transform([text]).toarray()
        sentiment = transform_label(knn_model.predict(x_predict))
    elif model == 'random_forest':
        x_predict = bog_vec.transform([text]).toarray()
        sentiment = transform_label(random_forest_model.predict(x_predict))
    elif model == 'logistic_regression':
        x_predict = bog_vec.transform([text]).toarray()
        sentiment = transform_label(logistic_model.predict(x_predict))
    elif model == 'svm':
        x_predict = tfidf_vec.transform([text]).toarray()
        sentiment = transform_label(svm_model.predict(x_predict))
    elif model == 'kmeans':
        x_predict = tfidf_vec.transform([text]).toarray()
        sentiment = transform_label(k_means_model.predict(x_predict))
    elif model == 'nn':
        x_predict = torch.tensor(binary_vec.transform([text]).toarray(), dtype=torch.float32)
        with torch.no_grad():
            outputs = nn_model(x_predict)
            preds = torch.argmax(outputs, dim=1).numpy()
        sentiment = transform_label(preds[0])
    elif model == 'lstm':
        new_reviews=[]
        new_reviews.append(text)
        new_reviews_seq = token.texts_to_sequences(new_reviews)
        new_reviews_padded = pad_sequences(new_reviews_seq, maxlen=24)

        predictions = lstm_model.predict(new_reviews_padded, verbose=0)
        predicted_classes = np.argmax(predictions, axis=1)
        sentiment = transform_label(predicted_classes[0])
    return sentiment

In [21]:
print(inference('nn', "I love it very much"))

neutral
