In [4]:
# Download GloVe embeddings (this will take a minute or two)
!wget http://nlp.stanford.edu/data/glove.6B.zip

# Unzip the downloaded file
!unzip -q glove.6B.zip


--2025-04-20 16:06:08--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2025-04-20 16:06:08--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2025-04-20 16:06:08--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [2]:
pip install emoji

Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1


In [5]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import emoji

# Sample dataset
X_train = np.array(["I love you", "I hate you", "I am so happy", "I am sad", "You are amazing"])
Y_train = np.array([0, 1, 2, 3, 2])  # 0: ❤️, 1: 😠, 2: 😄, 3: 😢

X_test = np.array(["You make me smile", "I am heartbroken"])
Y_test = np.array([2, 3])

# Emoji dictionary
emoji_dict = {0: "❤️", 1: "😠", 2: "😄", 3: "😢"}

# Load GloVe embeddings
def load_glove_embeddings(file_path="glove.6B.50d.txt"):
    print("Loading GloVe word vectors...")
    embeddings_index = {}
    with open(file_path, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype="float32")
            embeddings_index[word] = coefs
    print(f"Loaded {len(embeddings_index)} word vectors.")
    return embeddings_index

# Convert sentence to average word vector
def sentence_to_avg(sentence, word_to_vec_map):
    words = sentence.lower().split()
    avg = np.zeros((50,))
    count = 0
    for w in words:
        if w in word_to_vec_map:
            avg += word_to_vec_map[w]
            count += 1
    if count > 0:
        avg /= count
    return avg

# Load GloVe
word_to_vec_map = load_glove_embeddings()

# Vectorize dataset
X_train_avg = np.array([sentence_to_avg(s, word_to_vec_map) for s in X_train])
X_test_avg = np.array([sentence_to_avg(s, word_to_vec_map) for s in X_test])

# Train model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_avg, Y_train)

# Predict and print results
Y_pred = clf.predict(X_test_avg)
print("\nPredictions:")
for i, sent in enumerate(X_test):
    print(f"{sent} => {emoji_dict[Y_pred[i]]}")

# Evaluate
print("\nAccuracy:", accuracy_score(Y_test, Y_pred))
print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))


Loading GloVe word vectors...
Loaded 400000 word vectors.

Predictions:
You make me smile => 😄
I am heartbroken => 😢

Accuracy: 1.0
Confusion Matrix:
 [[1 0]
 [0 1]]
