## Reference
Clip Client [example notebook](https://github.com/rom1504/clip-retrieval/blob/main/notebook/clip-client-query-api.ipynb) by Romain Beaumont

Clip Retrieval [UI](https://rom1504.github.io/clip-retrieval/)

In [18]:
%pip install clip-retrieval img2dataset



Note: you may need to restart the kernel to use updated packages.


In [1]:
from IPython.display import Image, display
from clip_retrieval.clip_client import ClipClient, Modality

In [2]:
def log_result(result):
    id, caption, url, similarity = result["id"], result["caption"], result["url"], result["similarity"]
    print(f"id: {id}")
    print(f"caption: {caption}")
    print(f"url: {url}")
    print(f"similarity: {similarity}")
    display(Image(url=url, unconfined=True))

In [3]:
def create_client(index_name="laion5B-L-14"):
    client = ClipClient(
        url="https://knn.laion.ai/knn-service",
        indice_name=index_name,
        aesthetic_score=9,
        aesthetic_weight=0.5,
        use_mclip=False, # multilingual data
        modality=Modality.IMAGE,
        num_images=50,
        deduplicate=True,
        use_safety_model=True,
        use_violence_detector=True
    )
    return client

In [4]:
available_indices = ["laion5B-L-14", "laion5B-H-14", "laion_400m"]

client = create_client("laion5B-H-14")

In [5]:
underspecified_prompts = {
                "USA_politics": ["Barack Obama", "Joe Biden", "Kamala Harris", "Alexandria Ocasio-Cortez", "John F Kennedy"], 
                "IND_sports": ["Baichung Bhutia", "Dhyan Chand", "Sachin Tendulkar", "Sania Mirza", "PV Sindhu"],
                "IND_politics": ["Mahatma Gandhi", "BR Ambedkar", "Indira Gandhi", "APJ Abdul Kalam", "Narendra Modi"],
                "IND_actor": ["Shah Rukh Khan", "Priyanka Chopra", "Aamir Khan", "Amitabh Bachchan", "Aishwarya Rai"]
            }

In [6]:
category = "IND_politics"

for prompt in underspecified_prompts[category]:
    client_results = client.query(text=prompt)
    print([item["caption"] for item in client_results])
    print()

['Mahatma Gandhi in colour/color - Copyright: GandhiServe India - www.gandhiserveindia.org - Ghandi', 'The Mahatma by I.M. Spadecaller', 'Mahatma Gandhi - Copyright: GandhiServe India - www.gandhiserveindia.org - Ghandi', 'The Mahatma', '"MAHATMA GANDHI WHOSE MODEL OF ""SATYA GIRI"" AND PEACE WALKS WE FOLLOW"', 'Mahatma Gandhi', 'Mahatma Gandhi by Vallabh Kargathra', 'Mahatma Ghandhi', 'Le più belle <strong>frasi di Mahatma Gandhi</strong> – <em>Raccolta completa</em>', 'Mahatma Gandhi in colour/color - Copyright: GandhiServe India - www.gandhiserveindia.org - Ghandi', 'Mahatma Gandhi, il vincitore mancato', 'Mahatma Gandhi in colour/color - Copyright: GandhiServe India - www.gandhiserveindia.org - Ghandi', 'Mahatma Gandhi - 150 Anni', 'Mahatma Gandhi  2 nd  Oct 1869-30 th  Jan 1948 (79 Yrs)', '68_Gandhi_lathi_noakhali', 'Mahatma Gandhi suvichar in Marathi', 'Gandhi', 'Mahatma Gandhi - Copyright: GandhiServe India - www.gandhiserveindia.org - Ghandi', 'Mahatma Gandhi', '"""Congo YT 206

["The Untold Tale Behind Modi's Shock Takeover of a Risky Lender", 'Narendra Modi by caricature-artist', 'Do You Know Narendra Modi', 'Management Guru Narendra Modi', 'Narendra Modi Next Prime Minister of India', 'Narendra_Modi', 'The Best Namo Hd/Hq Wallpapers 2013', 'What are some strange facts about Narendra Modi?', 'Is this the 2014 effect? Modi sits through presentation on 2002 riots, says I', 'Gujarat Chief Minister Narendra Modi', 'Congress Fire Modi Over Burqa Statement', 'Narendra Modi Biography apk screenshot', "Union Budget 2013 lacks strategy and vision for India's development: Modi", 'The saffron makeover: Rajnath Singh set to bring BJP stalwarts to centrestage', 'Narendra Modi Facebook Cover 10', "India's Prime Minister Narendra Modi as called a meeting with all the state chief ministers Sunday to discuss the overhaul of the country's Planning Commission - Sputnik International", 'Narendra Modi clicked at his office in Gandhinagar', 'Gujarat chief minister Narendra Modi.'

In [17]:
ls ../googletrends-queries-images/076_BRA

BRA_2014.csv  BRA_2016.csv  BRA_2018.csv  BRA_2020.csv  BRA_2022.csv
BRA_2015.csv  BRA_2017.csv  BRA_2019.csv  BRA_2021.csv  BRA_2023.csv


In [12]:
import pandas as pd

root = '../googletrends-queries-images/'
countries = ['076_BRA', '356_IND', '840_USA', '566_NGA']

In [109]:
def generate_query_df(country_index=0):
    c = countries[i].split('_')[-1]
    df_list=[] 

    for year in range(2014, 2022):
        filename = f'{root}{countries[i]}/{c}_{year}.csv'
        df = pd.read_csv(f'{root}{countries[i]}/{c}_{year}.csv', skiprows=1).reset_index()
        df.columns = ["query", "year"]
        index_to_drop = df.index[df['query']=='RISING'][0]+1
        df = df.iloc[index_to_drop:]
        year_col = [year for i in range(len(df))]
        df['year'] = year_col
        df_list.append(df)

    df_new = pd.concat(df_list)
    # remove duplicate and single word queries
    df_new = df_new.drop_duplicates(subset=['query'])
    df_new = df_new[df_new['query'].str.count(' ') > 0]

    df_new = df_new.reset_index().drop(columns=['index'])
    
    return df_new

In [115]:
i = 3
df_new = generate_query_df(i)
print(df_new)
df_new.to_csv(f'{root}{countries[i]}/topqueries_2014-21.csv', encoding='utf-8', index=False)

                                        query  year
0                                chibok girls  2014
1                               ander herrera  2014
2                                    nokia xl  2014
3                        rabilu musa dan ibro  2014
4                          labarin boko haram  2014
..                                        ...   ...
161  bbc hausa labaran duniya da dumi-duminsu  2021
162                             aminiya hausa  2021
163                            zainab sambisa  2021
164                             jamila makira  2021
165                                    hot 10  2021

[166 rows x 2 columns]
