#### Augment existing movie emoji database with generation from Gemini

In [37]:
import re
import os
import json
import emoji
import regex
import unicodedata
from tqdm import tqdm
from glob import glob
from PIL import Image
from google import genai

from pydantic import BaseModel, TypeAdapter

In [2]:
apikey = os.environ.get("GEMINI_API_KEY")

In [3]:
with open('src/server/movies_all.json') as f:
    list_of_movies = json.load(f)
    print(list_of_movies[0])

{'name': 'Fantastic Four: Rise of the Silver Surfer', 'description': 'Four members of a team attempt to stop an official from doing his job.', 'hash': '337b1059', 'embedding': '0.03284311,-0.04012075,-0.04115805,0.02394610,-0.02181949,-0.03378739,-0.06757524,-0.05418657,-0.06940997,-0.01122688,-0.06758153,-0.03768939,-0.05418087,-0.01302225,-0.06038121,-0.01995086,-0.00705433,0.05357707,0.04776596,0.06678301,-0.06298641,-0.01599477,-0.01958111,-0.01998920,0.04969964,0.03132096,0.00862419,0.04467030,0.04003705,-0.00574990,-0.06448080,0.02905551,0.02386524,0.00882238,0.03605771,-0.01268227,-0.06611369,-0.05242034,0.04087871,-0.02217600,0.04035829,0.05508880,0.04351033,-0.04994832,-0.01784032,-0.00892656,0.03935673,0.05753110,0.07262865,0.02863567,-0.01647724,0.04133128,-0.03337650,-0.01262045,-0.00844111,-0.02521113,-0.06295076,-0.06692149,0.00396272,0.05933183,-0.01716153,-0.01505673,-0.06835454,0.00031897,0.04549292,0.00597061,0.07185090,-0.03970474,-0.03726762,0.03751422,-0.03750481,0

In [4]:
with open('src/server/movies_emoji.json') as f:
    list_of_emoji = json.load(f)
    print(list_of_emoji[0])

{'emoji': '☄️🌍🪙🏄', 'name': 'Fantastic Four: Rise of the Silver Surfer', 'description': 'Four members of a team attempt to stop an official from doing his job.', 'hash': '5896ef32', 'embedding': '0.03284311,-0.04012075,-0.04115805,0.02394610,-0.02181949,-0.03378739,-0.06757524,-0.05418657,-0.06940997,-0.01122688,-0.06758153,-0.03768939,-0.05418087,-0.01302225,-0.06038121,-0.01995086,-0.00705433,0.05357707,0.04776596,0.06678301,-0.06298641,-0.01599477,-0.01958111,-0.01998920,0.04969964,0.03132096,0.00862419,0.04467030,0.04003705,-0.00574990,-0.06448080,0.02905551,0.02386524,0.00882238,0.03605771,-0.01268227,-0.06611369,-0.05242034,0.04087871,-0.02217600,0.04035829,0.05508880,0.04351033,-0.04994832,-0.01784032,-0.00892656,0.03935673,0.05753110,0.07262865,0.02863567,-0.01647724,0.04133128,-0.03337650,-0.01262045,-0.00844111,-0.02521113,-0.06295076,-0.06692149,0.00396272,0.05933183,-0.01716153,-0.01505673,-0.06835454,0.00031897,0.04549292,0.00597061,0.07185090,-0.03970474,-0.03726762,0.0375

In [51]:
client = genai.Client(api_key=apikey)
class Movie(BaseModel):
  provided_movie: str
  emoji:list[str]

class EmojiSequence:
    def __init__(self, emoji):
        self.emoji = emoji
    
    def __str__(self):
        return self.emoji
    
    def __repr__(self):
        return f"'{self.emoji}'"
    
    def to_json(self):
        return self.emoji

class EmojiEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, EmojiSequence):
            return obj.to_json()
        return super().default(obj)

In [6]:
movie = list_of_movies[4]
print(movie)

{'name': 'Mulan', 'description': "Girl has to pretend she's a man in order to be taken seriously", 'hash': 'aaf9916', 'embedding': '-0.03248815,0.03001933,0.03603962,0.04461570,-0.06739928,0.08006297,0.03068032,0.05236538,0.03409820,-0.00004935,-0.05338304,-0.03173082,-0.04738799,-0.04708636,-0.07290818,-0.00432240,-0.00963434,0.07720398,0.02401556,-0.08107539,0.05943234,0.05531439,0.02932931,-0.00318265,0.04757718,0.04043901,-0.01952950,-0.02907183,0.04766662,0.00228527,-0.05846269,-0.00967449,0.03529789,0.05132646,0.05391673,-0.06271280,-0.07517276,-0.00117499,0.07328440,0.02282989,0.06577143,0.06324063,0.05593794,0.00839268,0.05010224,-0.01636588,-0.02026587,0.07761709,0.00033614,0.03833722,0.04836324,0.05731259,-0.00145989,0.07581622,0.04257533,-0.01925101,0.04201345,0.06728967,0.02316368,-0.00249534,-0.02233393,-0.04313153,-0.02838427,0.02789067,0.04142548,-0.02234504,0.01882542,-0.06050299,0.02516097,0.03011189,0.04646904,0.07850073,0.03392169,0.06326541,-0.04962700,-0.06001510,0

In [43]:
def find_by_name(objects_list, name):
    try:
        return next(obj for obj in objects_list if obj['name'] == name)
    except StopIteration:
        return None
    
def split_emoji(emoji_string: str) -> list[EmojiSequence]:
    emoji_list = regex.findall(r'\X', emoji_string)
    return [EmojiSequence(emoji) for emoji in emoji_list][:4]


In [59]:
response = client.models.generate_content(
    model="gemini-2.0-flash", contents=f"Describe the provided movie in only four emoji: {movie['name']}. Note, use descriptive emoji and make it easy to guess the name of the movie from only the emoji. You can also use the cryptic description provided: {movie['description']}",
    config={
        'response_mime_type': 'application/json',
        'response_schema': list[Movie],
    })

In [60]:
response.parsed

[Movie(provided_movie='Mission: Impossible - Fallout', emoji=['🧗', '☢️', '🚁', '💥'])]

In [63]:
listed = []
for movie in tqdm(list_of_movies):
    found = find_by_name(list_of_emoji,movie['name'])
    if(found and found['emoji'] and len(found['emoji']) > 1):
        emoji = found['emoji']
        movie['emoji'] = split_emoji(emoji)
    else:
        response = client.models.generate_content(
        model="gemini-2.0-flash", contents=f"Describe the provided movie in only four emoji: {movie['name']}. Note, use descriptive emoji and make it easy to guess the name of the movie from only the emoji and do not use any more than four emoji. You can also use the cryptic description provided: {movie['description']}",
        config={
            'response_mime_type': 'application/json',
            'response_schema': list[Movie],
        })
        if(response.parsed and response.parsed[0] and len(response.parsed[0].emoji)==4):
            emoji = response.parsed[0].emoji
            movie['emoji'] = emoji
        else:
            movie['emoji'] = []
    
    listed.append(movie)
    with open('enriched.json','w')as outfile:
        json.dump(listed,outfile,cls=EmojiEncoder,ensure_ascii=False)

100%|██████████| 1009/1009 [31:20<00:00,  1.86s/it]


In [64]:
with open('./enriched.json') as f:
    enriched_movies = json.load(f)
    print(enriched_movies[0])

{'name': 'Fantastic Four: Rise of the Silver Surfer', 'description': 'Four members of a team attempt to stop an official from doing his job.', 'hash': '337b1059', 'embedding': '0.03284311,-0.04012075,-0.04115805,0.02394610,-0.02181949,-0.03378739,-0.06757524,-0.05418657,-0.06940997,-0.01122688,-0.06758153,-0.03768939,-0.05418087,-0.01302225,-0.06038121,-0.01995086,-0.00705433,0.05357707,0.04776596,0.06678301,-0.06298641,-0.01599477,-0.01958111,-0.01998920,0.04969964,0.03132096,0.00862419,0.04467030,0.04003705,-0.00574990,-0.06448080,0.02905551,0.02386524,0.00882238,0.03605771,-0.01268227,-0.06611369,-0.05242034,0.04087871,-0.02217600,0.04035829,0.05508880,0.04351033,-0.04994832,-0.01784032,-0.00892656,0.03935673,0.05753110,0.07262865,0.02863567,-0.01647724,0.04133128,-0.03337650,-0.01262045,-0.00844111,-0.02521113,-0.06295076,-0.06692149,0.00396272,0.05933183,-0.01716153,-0.01505673,-0.06835454,0.00031897,0.04549292,0.00597061,0.07185090,-0.03970474,-0.03726762,0.03751422,-0.03750481,0