In [26]:
import pickle

with open('rnn_weights/vocab.pkl', 'rb') as f:
    vocab = pickle.load(f)

with open('rnn_weights/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

with open('rnn_weights/label_encoders.pkl', 'rb') as f:
    labelencoders = pickle.load(f)

In [27]:

categorical_cols = ['gender', 'age_group', 'region', 'product_category', 
                    'purchase_channel', 'platform', 'issue_resolved', 'complaint_registered']

numerical_cols = ['customer_rating', 'response_time_hours']



In [28]:

def encode_categorical(row):

    for col in categorical_cols:

        le = labelencoders[col]
        
        if row[col] in le.classes_:
            row[col] = le.transform([row[col]])[0]
        else:
            row[col] = -1  # Unknown category

    return row


In [29]:

def scale_numerical(row, numerical_cols, scaler):

    scaled = scaler.transform([[row[col] for col in numerical_cols]])
    
    for i, col in enumerate(numerical_cols):

        row[col] = float(scaled[0][i])
    
    return row


In [30]:

def tokenize(text):

  text = text.lower()
  text = text.replace('?','')
  text = text.replace("'","")
  
  return text.split()

def text_to_indices(text, vocab):

  indexed_text = []

  for token in tokenize(text):

    if token in vocab:
      indexed_text.append(vocab[token])
    else:
      indexed_text.append(vocab['<UNK>']) 

  return indexed_text

def pad_sequence(seq, max_len, pad_value = 0):
    
    if len(seq) < max_len:
        seq = seq + [pad_value] * (max_len - len(seq))
    else:
        seq = seq[:max_len]  # decrease
    return seq


def preprocess_review(text, vocab, max_len=6):

    indexed = text_to_indices(text, vocab)
    padded = pad_sequence(indexed, max_len,vocab['<PAD>'])
    
    return padded

In [31]:
import torch
import torch.nn as nn

class SimpleRNNModel(nn.Module):

    def __init__(self,vocab_size, text_emb_dim, rnn_hidden, feature_dim, num_classes , dropout_rate = 0.3):
       
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, text_emb_dim, padding_idx=0)  
        self.rnn = nn.RNN(text_emb_dim, rnn_hidden, batch_first=True)
        
        self.fc = nn.Sequential(
            nn.Linear(rnn_hidden + feature_dim , 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, num_classes)
        )

    def forward(self, text_seq, feature):

        x = self.embedding(text_seq)     
    
        hidden_combined , hidden_last = self.rnn(x)  # (all hidden combined , hidden for last step )

        hidden_last = hidden_last.squeeze(0)

        x = torch.cat([hidden_last, feature], dim=1)                  

        out = self.fc(x)                     

        return out


In [32]:
vocab_size = len(vocab)
feature_dim = 10
num_classes = 3

text_emb_dim = 16
rnn_hidden = 64
dropout_rate = 0.35541525567992005

In [33]:

model = SimpleRNNModel(vocab_size, text_emb_dim, rnn_hidden, feature_dim, num_classes , dropout_rate)
model.load_state_dict(torch.load("rnn_weights/model_weights.pth", map_location="cpu"))

model.eval()

SimpleRNNModel(
  (embedding): Embedding(56, 16, padding_idx=0)
  (rnn): RNN(16, 64, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=74, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.35541525567992005, inplace=False)
    (3): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [34]:
row = {'gender': 'string', 
'age_group': 'string', 'region': 'string', 
'product_category': 'string', 
'purchase_channel': 'string',
 'platform': 'string', 
 'issue_resolved': 'string',
  'complaint_registered': 'string',
  'customer_rating': 0, 
  'response_time_hours': 0, 
  'review_text': 'string'}

In [35]:
row['review_text'] = preprocess_review(row['review_text'] , vocab )
row = encode_categorical(row)
row = scale_numerical(row, numerical_cols, scaler)

print(row)

{'gender': -1, 'age_group': -1, 'region': -1, 'product_category': -1, 'purchase_channel': -1, 'platform': -1, 'issue_resolved': -1, 'complaint_registered': -1, 'customer_rating': -2.1375764685299474, 'response_time_hours': -1.7489223911054146, 'review_text': [1, 0, 0, 0, 0, 0]}




In [36]:

feature_cols = categorical_cols + numerical_cols
feature_values = [row[col] for col in feature_cols]   

seq = torch.tensor(row['review_text'],dtype=torch.long).unsqueeze(0)
feature =  torch.tensor(feature_values, dtype=torch.float).unsqueeze(0) 

print(seq.shape)
print(feature.shape)

torch.Size([1, 6])
torch.Size([1, 10])


In [37]:
sentiment = {2: 'positive', 1: 'neutral', 0: 'negative'}

In [38]:
with torch.no_grad():
    
    outputs = model(seq , feature)
    _ , predicted = torch.max(outputs, 1)
        
    print(predicted.item())
    print(sentiment[predicted.item()])
    
            

0
negative
