# pipeline for emoji analysis using only emoji embeddings 

In [23]:
import gensim
import numpy as np
import pickle
import emoji

# Load trained RandomForestRegressor model
with open("model/emoji_sentiment_LGBM.pkl", "rb") as f:
    model = pickle.load(f)

# Load Emoji2Vec model
emoji2vec_path = "emoji2vec/pre-trained/emoji2vec.bin"  # Change to your actual path
emoji_model = gensim.models.KeyedVectors.load_word2vec_format(emoji2vec_path, binary=True)


In [24]:
def extract_emojis(text):
    """Extract all emojis from text"""
    return ''.join(c for c in text if c in emoji.EMOJI_DATA)

def get_emoji_embedding(emoji_char):
    """Extract embedding for a given emoji"""
    try:
        return emoji_model[emoji_char]
    except KeyError:
        return np.zeros(300)  # Return zero vector if emoji not found


In [25]:
def predict_sentiment(user_input):
    """Predict sentiment score based on user input containing only emojis"""
    
    # Extract emojis from input
    extracted_emojis = extract_emojis(user_input)
    
    # Handle case when no emoji is found
    if not extracted_emojis:
        return "No emojis detected! Please enter a sentence with emojis."

    # Get emoji embeddings
    emoji_embeddings = np.mean([get_emoji_embedding(e) for e in extracted_emojis], axis=0)

    # Reshape for model input
    emoji_features = emoji_embeddings.reshape(1, -1)

    # Predict sentiment score
    predicted_score = model.predict(emoji_features)[0]

    return predicted_score


In [27]:
# Example inputs
user_text1 = "I love this! ❤️"  # Positive sentiment
user_text2 = "I'm so sad... 💔"  # Negative sentiment
user_text3 = "What a weird day 🤖🧐"  # Neutral sentiment
user_text4 = " dafv"  # No emojis

# Predict sentiment scores
print(f"Sentiment Score for '{user_text1}': {predict_sentiment(user_text1)}")
print(f"Sentiment Score for '{user_text2}': {predict_sentiment(user_text2)}")
print(f"Sentiment Score for '{user_text3}': {predict_sentiment(user_text3)}")
print(f"Sentiment Score for '{user_text4}': {predict_sentiment(user_text4)}")


Sentiment Score for 'I love this! ❤️': 0.5946597148164596
Sentiment Score for 'I'm so sad... 💔': 0.09371549747950185
Sentiment Score for 'What a weird day 🤖🧐': 0.5758505565267965
Sentiment Score for ' dafv': No emojis detected! Please enter a sentence with emojis.


In [32]:
# Load the saved ensemble models
with open("model/emoji_sentiment_ensemble.pkl", "rb") as f:
    rf_model, lgbm_model, xgb_model = pickle.load(f)

def predict_emoji_sentiment(emoji_char):
    """Predict sentiment score for an emoji using ensemble learning"""
    emoji_embedding = get_emoji_embedding(emoji_char).reshape(1, -1)
    
    # Get predictions
    pred_rf = rf_model.predict(emoji_embedding)
    pred_lgbm = lgbm_model.predict(emoji_embedding)
    pred_xgb = xgb_model.predict(emoji_embedding)

    # Compute final sentiment score
    final_score = (0.5 * pred_rf) + (0.3 * pred_lgbm) + (0.2 * pred_xgb)
    return final_score[0]

# Example Predictions
print(f"Sentiment Score for ❤️: {predict_emoji_sentiment('❤️')}")
print(f"Sentiment Score for 😢: {predict_emoji_sentiment('😢')}")
print(f"Sentiment Score for 🤖: {predict_emoji_sentiment('🤖')}")


Sentiment Score for ❤️: 0.766419100031869
Sentiment Score for 😢: 0.13798656724098868
Sentiment Score for 🤖: 0.5051691668260989


# pipeline for emoji analysis using only emoji embeddings as well as text embeddings

In [28]:
import re
import gensim
import numpy as np
import pickle
import emoji
import torch
from transformers import BertTokenizer, BertModel

# Load trained RandomForestRegressor model (expects 1068 features)
with open("model/emoji_sentiment_RFR.pkl", "rb") as f:
    model = pickle.load(f)

# Load Emoji2Vec model
emoji2vec_path = "emoji2vec/pre-trained/emoji2vec.bin"  # Change to your actual path
emoji_model = gensim.models.KeyedVectors.load_word2vec_format(emoji2vec_path, binary=True)

# Load BERT model for text embeddings
bert_model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = BertModel.from_pretrained(bert_model_name)




In [29]:
def extract_emojis(text):
    """Extract all emojis from text"""
    return ''.join(c for c in text if c in emoji.EMOJI_DATA)

def remove_emojis(text):
    """Remove emojis from text to extract pure words"""
    return emoji.replace_emoji(text, replace='')

def get_emoji_embedding(emoji_char):
    """Extract embedding for a given emoji"""
    try:
        return emoji_model[emoji_char]
    except KeyError:
        return np.zeros(300)  # Return zero vector if emoji not found

def get_text_embedding(text):
    """Get BERT embedding for text description"""
    tokens = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=10)
    with torch.no_grad():
        output = bert_model(**tokens)
    return output.last_hidden_state.mean(dim=1).squeeze().numpy()  # Mean pooled embedding


In [30]:
def predict_sentiment(user_input):
    """Predict sentiment score based on user input containing text and emojis"""
    
    # Extract emojis and text separately
    extracted_emojis = extract_emojis(user_input)
    cleaned_text = remove_emojis(user_input)

    # Get embeddings
    emoji_embeddings = np.zeros(300)  # Default to zero vector
    if extracted_emojis:
        emoji_embeddings = np.mean([get_emoji_embedding(e) for e in extracted_emojis], axis=0)

    text_embedding = get_text_embedding(cleaned_text)

    # Combine features (Emoji2Vec + BERT) to match model input (1068-d)
    combined_features = np.hstack((emoji_embeddings, text_embedding)).reshape(1, -1)

    # Predict sentiment score
    predicted_score = model.predict(combined_features)[0]

    return predicted_score


In [31]:
# Example inputs
user_text1 = "I love this! ❤️"  # Positive sentiment
user_text2 = "I'm so sad... 😢"  # Negative sentiment
user_text3 = "What a weird day 🤖🧐"  # Neutral sentiment

# Predict sentiment scores
print(f"Sentiment Score for '{user_text1}': {predict_sentiment(user_text1)}")
print(f"Sentiment Score for '{user_text2}': {predict_sentiment(user_text2)}")
print(f"Sentiment Score for '{user_text3}': {predict_sentiment(user_text3)}")


Sentiment Score for 'I love this! ❤️': 0.5353125
Sentiment Score for 'I'm so sad... 😢': 0.5675
Sentiment Score for 'What a weird day 🤖🧐': 0.51


# ensemble model nn, rf, xgb 
 

In [1]:
import requests

url = "http://127.0.0.1:5000/predict"
params = {"emoji": "😂"}
response = requests.get(url, params=params)
print(response.json())


JSONDecodeError: Expecting value: line 1 column 1 (char 0)