In [8]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

  from tqdm.autonotebook import tqdm


In [11]:
import os
import pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="emojis", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [2]:
import json
data = json.load(open("emojis.json"))
data['emojis']

[{'id': 'emoji_001',
  'emoji': '😀',
  'name': 'grinning face',
  'description': 'A yellow face with simple, open eyes and a broad, open smile, showing upper teeth and tongue on some platforms.',
  'category': 'smileys_and_emotion',
  'tags': ['smile', 'happy', 'joy', 'grin']},
 {'id': 'emoji_002',
  'emoji': '🎉',
  'name': 'party popper',
  'description': 'A party popper, as would be used at a celebration. Depicted as a gold cone shooting out colorful confetti and streamers.',
  'category': 'activities',
  'tags': ['celebration', 'party', 'festive', 'confetti']},
 {'id': 'emoji_003',
  'emoji': '🌈',
  'name': 'rainbow',
  'description': 'A rainbow, as seen after rain. Depicted as an arch of multicolored stripes, generally showing six colors: red, orange, yellow, green, blue, and purple.',
  'category': 'travel_and_places',
  'tags': ['weather', 'nature', 'colors', 'pride']},
 {'id': 'emoji_004',
  'emoji': '🍕',
  'name': 'pizza',
  'description': 'A slice of pepperoni pizza, with a re

In [25]:
from openai import OpenAI

processed_data = []
client = OpenAI()

for emoji in data['emojis']:
    response = client.embeddings.create(
        input=emoji['emoji'],
        model="text-embedding-3-small"
    )
    
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": emoji["id"],  # Correct key for 'id'
        "metadata": {
            "emoji": emoji["emoji"],  # Correct key for 'emoji'
            "name": emoji["name"],  # Correct key for 'name'
            "description": emoji["description"],  # Correct key for 'description'
            "category": emoji["category"],  # Correct key for 'category'
            "tags": emoji["tags"]  # Correct key for 'tags'
        }
    })

# Now `processed_data` contains embeddings for all emojis


In [26]:
processed_data[0]

{'values': [0.0028769993,
  -0.029141812,
  -0.020836616,
  0.013606384,
  -0.0065491768,
  -0.017935688,
  -0.006262029,
  0.030216776,
  -0.061729398,
  -0.012921647,
  0.026447041,
  0.021307833,
  0.019393515,
  -0.020232871,
  0.022986544,
  0.008305196,
  -0.030054795,
  -0.0076572727,
  0.023119073,
  0.07551249,
  0.08057807,
  0.02065991,
  0.0120528415,
  -0.006578628,
  0.018583613,
  0.006464505,
  -0.035370708,
  -0.005555204,
  0.0322489,
  -0.008967844,
  -0.030953052,
  -0.025033392,
  -0.00807695,
  -0.020115066,
  -0.0043035345,
  0.055014558,
  -0.009174001,
  0.02852334,
  -0.03875758,
  0.01671347,
  -0.03454608,
  -0.04779905,
  -0.0059859254,
  0.021837952,
  0.009343345,
  -0.03681381,
  -0.02793432,
  0.0073075416,
  0.073097505,
  0.009689395,
  -0.03348584,
  -0.023310505,
  0.020011988,
  0.09023801,
  -0.046120342,
  -0.00025608606,
  0.018274376,
  0.060786963,
  -0.014467828,
  0.03598918,
  0.03881648,
  0.00442502,
  0.030776346,
  -0.01770008,
  0.0066

In [29]:
index = pc.Index('emojis')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
    
)

{'upserted_count': 35}

In [28]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 55}},
 'total_vector_count': 55}