# Intro

In [1]:
import pandas as pd
import numpy as np
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

#pip install gradio_client

In [2]:
#import csv filee interactions_train.csv
interactions = pd.read_csv('kaggle_data/interactions_train.csv')
items = pd.read_csv('kaggle_data/items.csv')

display(interactions.head())
display(items.head())



Unnamed: 0,u,i,t
0,4456,8581,1687541000.0
1,142,1964,1679585000.0
2,362,3705,1706872000.0
3,1809,11317,1673533000.0
4,4384,1323,1681402000.0


Unnamed: 0,Title,Author,ISBN Valid,Publisher,Subjects,i
0,Classification décimale universelle : édition ...,,9782871303336; 2871303339,Ed du CEFAL,Classification décimale universelle; Indexatio...,0
1,Les interactions dans l'enseignement des langu...,"Cicurel, Francine, 1947-",9782278058327; 2278058320,Didier,didactique--langue étrangère - enseignement; d...,1
2,Histoire de vie et recherche biographique : pe...,,2343190194; 9782343190198,L'Harmattan,Histoires de vie en sociologie; Sciences socia...,2
3,Ce livre devrait me permettre de résoudre le c...,"Mazas, Sylvain, 1980-",9782365350020; 236535002X; 9782365350488; 2365...,Vraoum!,Moyen-Orient; Bandes dessinées autobiographiqu...,3
4,Les années glorieuses : roman /,"Lemaitre, Pierre, 1951-",9782702180815; 2702180817; 9782702183618; 2702...,Calmann-Lévy,France--1945-1975; Roman historique; Roman fra...,4


# Understand the data

In [3]:
#count the number of elements in each column
count_elements = items.count()
count_elements

Title         15291
Author        12638
ISBN Valid    14568
Publisher     15266
Subjects      13068
i             15291
dtype: int64

In [4]:
# How many books did people read?
books_per_user = interactions.groupby('u')['i'].count()

summary_stats = books_per_user.describe()
print(summary_stats)

count    7838.000000
mean       11.105767
std        16.441875
min         3.000000
25%         3.000000
50%         6.000000
75%        11.000000
max       385.000000
Name: i, dtype: float64


In [5]:
#number of unique users
unique_users = interactions['u'].nunique()
print(f"Number of unique users: {unique_users}")

Number of unique users: 7838


In [6]:
#missing values for items
missing_values = items.isnull().sum()
missing_values

Title            0
Author        2653
ISBN Valid     723
Publisher       25
Subjects      2223
i                0
dtype: int64

# Train Test Split

In [7]:
interactions = interactions.sort_values(["u", "t"])
interactions.head(10)

Unnamed: 0,u,i,t
21035,0,0,1680191000.0
28842,0,1,1680783000.0
3958,0,2,1680801000.0
29592,0,3,1683715000.0
6371,0,3,1683715000.0
41220,0,4,1686569000.0
12217,0,5,1687014000.0
19703,0,6,1687014000.0
64522,0,7,1687014000.0
29380,0,8,1687260000.0


In [8]:
interactions["pct_rank"] = interactions.groupby("u")["t"].rank(pct=True, method='dense')
interactions.reset_index(inplace=True, drop=True)
interactions.head(10)

Unnamed: 0,u,i,t,pct_rank
0,0,0,1680191000.0,0.04
1,0,1,1680783000.0,0.08
2,0,2,1680801000.0,0.12
3,0,3,1683715000.0,0.16
4,0,3,1683715000.0,0.2
5,0,4,1686569000.0,0.24
6,0,5,1687014000.0,0.28
7,0,6,1687014000.0,0.32
8,0,7,1687014000.0,0.36
9,0,8,1687260000.0,0.4


In [9]:
train_data = interactions[interactions["pct_rank"] < 0.8]
test_data = interactions[interactions["pct_rank"] >= 0.8]

In [10]:
print("Training set size:", train_data.shape[0])
print("Testing set size:", test_data.shape[0])


Training set size: 65419
Testing set size: 21628


# 1. Collaborative Filtering
    Recommendations based on user-item interactions. 


In [11]:
# Create a user-item interaction matrix with binary values (1 if read, 0 otherwise)
binary_interaction_matrix = train_data.pivot_table(index='u', columns='i', values='t', aggfunc='count')
binary_interaction_matrix = binary_interaction_matrix.notnull().astype(int)

binary_interaction_matrix

i,0,1,2,3,4,5,6,7,8,9,...,15279,15280,15282,15283,15284,15285,15287,15288,15289,15290
u,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7833,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7834,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7835,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7836,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Compute Cosine Similarity Between Users
from sklearn.metrics.pairwise import cosine_similarity
user_similarity = cosine_similarity(binary_interaction_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=binary_interaction_matrix.index, columns=binary_interaction_matrix.index)
user_similarity_df

u,0,1,2,3,4,5,6,7,8,9,...,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837
u,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7834,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
7835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7836,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [13]:
k = 10

# 1. user-item scores for every user in one shot
scores = user_similarity_df.values @ binary_interaction_matrix.values        # (U, I)

# 2. indices of each user’s k best-scoring items (unsorted)
top_idx_unsorted = np.argpartition(-scores, k-1, axis=1)[:, :k]              # (U, k)

# 3. sort those k items per user so they’re really rank-ordered
rows   = np.arange(scores.shape[0])[:, None]                                  # (U, 1)
order  = np.argsort(-scores[rows, top_idx_unsorted], axis=1)
top_idx = top_idx_unsorted[rows, order]                                       # (U, k) sorted

# 4. look up the *labels* with NumPy → 2-D array → DataFrame
item_labels = binary_interaction_matrix.columns.to_numpy()                    # (I,)
top_labels  = item_labels[top_idx]                                            # (U, k)

recommendations = pd.DataFrame(
    top_labels,                              # the items
    index=binary_interaction_matrix.index,   # the users
    columns=range(k)                         # rank 0…9
)

# quick peek
print(recommendations.head())

     0    1    2    3    4    5    6    7    8     9
u                                                   
0   13    4   12   15   14   11    8   10    9     5
1   34   30   29   37   31   32   33   36   35  1573
2   46   58   53   49   56   82   64   75   45    67
3  149   40  138  155  128  142  143  156  133   139
4  202  198  191  203  193  201  197  196  199   195


In [14]:
# Convert each row of recommendations to a space-separated string
recommendations_str = recommendations.apply(lambda row: ' '.join(row.astype(str)), axis=1)

# Export to CSV with a single-column header
recommendations_str.to_csv('recommendations.csv', index=True, header=['recommendation'])

# Better predictions

In [10]:
from sklearn.decomposition import TruncatedSVD

# Apply TruncatedSVD to reduce dimensionality
svd = TruncatedSVD(n_components=50, random_state=42)
user_factors = svd.fit_transform(binary_interaction_matrix.values)
item_factors = svd.components_

# Reconstruct approximate interaction scores
approx_scores = user_factors @ item_factors

# Convert reconstructed scores into a DataFrame (same indexes and columns as original)
approx_scores_df = pd.DataFrame(approx_scores, index=binary_interaction_matrix.index, columns=binary_interaction_matrix.columns)

# Get top 10 items for each user
k = 10
recommendations_svd = approx_scores_df.apply(lambda row: row.nlargest(k).index.tolist(), axis=1)
recommendations_svd.head()

u
0    [611, 46, 794, 4, 8999, 685, 2141, 3407, 2185,...
1    [611, 323, 9281, 5753, 618, 789, 176, 796, 769...
2    [46, 8999, 323, 66, 56, 3055, 2130, 87, 611, 5...
3    [149, 611, 163, 618, 169, 128, 466, 2139, 122,...
4    [323, 424, 976, 4497, 2225, 201, 472, 11332, 4...
dtype: object

# Functions Google API

In [None]:
def get_book_details(isbn):
    api_url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"
    try:
        response = requests.get(api_url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()

        if 'items' in data:
            book_info = data['items'][0]['volumeInfo']
            details = {
                'title': book_info.get('title'),
                'subtitle': book_info.get('subtitle'),
                'authors': book_info.get('authors'),
                'publishedDate': book_info.get('publishedDate'),
                'pageCount': book_info.get('pageCount'),
                'maturityRating': book_info.get('maturityRating'),
                'language': book_info.get('language'),
                'textSnippet': book_info.get('searchInfo', {}).get('textSnippet'),
                'description': book_info.get('description'),
                'categories': book_info.get('categories'),
                'thumbnail': book_info.get('imageLinks', {}).get('thumbnail')
            }
            return details
        else:
            return {"error": "No books found matching that ISBN."}
    except requests.exceptions.RequestException as e:
        return {"error": f"An error occurred: {e}"}

# Example usage:
isbn = "9782369903093"
book_details = get_book_details(isbn)
book_details

{'title': 'Peau',
 'subtitle': None,
 'authors': ['Mieke Versyp'],
 'publishedDate': '2022',
 'pageCount': 0,
 'maturityRating': 'NOT_MATURE',
 'language': 'fr',
 'textSnippet': None,
 'description': "Deux femmes se rencontrent dans un atelier de dessin. Esther, jeune artiste, anime un cours de nu pour adultes. Rita, plus âgée, mère et divorcée, y pose comme modèle vivant. Aussi différentes soient-elles, les deux femmes sont liées en tant que dessinatrice et modèle ; une relation qui tourne autour du fait de regarder et d'être regardée. En dehors de ces séances, chacune mène sa propre vie. Esther rencontre un homme qu'elle pourrait aimer, Rita tente de s'habituer à l'absence de sa fille... Toutes deux cherchent et s'accrochent, luttant contre leur passé et leurs insécurités.",
 'categories': ["Artists' models"],
 'thumbnail': 'http://books.google.com/books/content?id=6AmYzwEACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api'}

In [None]:
# Initialize a list to store the complete metadata
items_complete_list = []

# Iterate over the limited_items DataFrame
for _, row in limited_items.iterrows():
    isbn = row['ISBN Valid']
    if pd.notnull(isbn):  # Ensure the ISBN is not NaN
        primary_isbn = isbn.split(';')[0]  # Use the first ISBN if multiple are listed
        book_details = get_book_details(primary_isbn)
        if isinstance(book_details, dict):  # Ensure book_details is a dictionary
            book_details['i'] = row['i']  # Add the 'i' column value to the metadata
            book_details['isbn'] = primary_isbn  # Add the primary ISBN to the metadata
            book_details['title'] = row['Title']  # Add the 'Title' column value to the metadata
            items_complete_list.append(book_details)

# Create a new DataFrame from the collected metadata
items_complete = pd.DataFrame(
    items_complete_list,
    columns=[
        'i', 'isbn', 'title', 'subtitle', 'authors', 'publishedDate',
        'pageCount', 'maturityRating', 'language', 'textSnippet',
        'description', 'categories', 'thumbnail'
    ]
)

# Map 'NOT_MATURE' to 0 and other values to 1
items_complete['maturityRating'] = items_complete['maturityRating'].map(lambda x: 0 if x == 'NOT_MATURE' else 1)

# Display the first few rows of the new DataFrame
items_complete

Unnamed: 0,i,isbn,title,subtitle,authors,publishedDate,pageCount,maturityRating,language,textSnippet,description,categories,thumbnail
0,0,9782871303336,Classification décimale universelle : édition ...,édition abrégée,[UDC Consortium (The Hague)],2012,449,0,fr,,,,
1,1,9782278058327,Les interactions dans l'enseignement des langu...,agir professoral et pratiques de classe,[Francine Cicurel],2011,287,0,fr,,C'est dans l'interaction en classe que s'actua...,[Interaction analysis in education],
2,2,2343190194,Histoire de vie et recherche biographique : pe...,perspectives sociohistoriques,"[Aneta Slowik, Hervé Breton, Gaston Pineau]",2020,311,0,fr,,Depuis la parution en 1918 de l'ouvrage fondat...,[Narrative inquiry (Research method)],http://books.google.com/books/content?id=Q2PMD...
3,3,9782365350020,Ce livre devrait me permettre de résoudre le c...,,[Sylvain Mazas],2012-06-07,200,0,fr,,,,
4,4,9782702180815,Les années glorieuses : roman /,Les années glorieuses,[Pierre Lemaitre],2022,586,0,fr,,"Trois histoires d'amour, un lanceur d'alerte, ...",[French literature],http://books.google.com/books/content?id=f5u3z...
5,5,9782353450428,100 idées pour mieux gérer les troubles de l'a...,,[Francine Lussier],2013-02-11,176,0,fr,,"Chaque jour, les parents et les enseignants so...",[Education],
6,6,9782100806614,La boîte à outils des formateurs : 71 outils...,,"[Fabienne Bouchut, Frédérique Cuisiniez, Isabe...",2020-05-27,200,0,fr,,,,
7,7,9782710121152,L'autorité éducative dans la classe : douze si...,Douze situations pour apprendre à l'exercer,[Bruno Robbes],2010,265,0,fr,,"Tout enseignant, tout éducateur se doit d'exer...",,
8,8,9791097160821,Un autre regard sur le climat /,,[Emma],2019-05-02,96,0,fr,,Une bande-dessinée inédite par la blogueuse la...,,
9,9,9782413043591,Le mirage de la croissance verte /,,[Anthony Auffret],2022,152,0,fr,,,,


In [None]:
# Cell 14: get_book_details function (Python)
# -------------------------------------------
def get_book_details(isbn):
    api_url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}"
    try:
        response = requests.get(api_url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()

        if 'items' in data:
            book_info = data['items'][0]['volumeInfo']
            details = {
                'title': book_info.get('title'),
                'subtitle': book_info.get('subtitle'),
                'authors': book_info.get('authors'),
                'publishedDate': book_info.get('publishedDate'),
                'pageCount': book_info.get('pageCount'),
                'maturityRating': book_info.get('maturityRating'),
                'language': book_info.get('language'),
                'textSnippet': book_info.get('searchInfo', {}).get('textSnippet'),
                'description': book_info.get('description'),
                'categories': book_info.get('categories'),
                'thumbnail': book_info.get('imageLinks', {}).get('thumbnail')
            }
            return details
        else:
            return {"error": "No books found matching that ISBN."}
    except requests.exceptions.RequestException as e:
        return {"error": f"An error occurred: {e}"}

# Example usage:
isbn = "9782369903093"
book_details = get_book_details(isbn)
book_details

# Cell 15: Fetch metadata in parallel batches (Python)
# -----------------------------------------------------
def process_single_row(row):
    isbn = row['ISBN Valid']
    if pd.notnull(isbn):
        primary_isbn = isbn.split(';')[0]  # Use the first ISBN if multiple
        # Add a small pause to avoid rapid consecutive requests
        time.sleep(0.1)
        details = get_book_details(primary_isbn)
        if isinstance(details, dict):
            details['i'] = row['i']  # Add the 'i'
            details['isbn'] = primary_isbn  # Add the primary ISBN
            details['title'] = row['Title']  # Add the 'Title'
            return details
    return None

def process_items_in_batches(df, batch_size=50):
    items_complete_list = []
    
    for start_index in range(0, len(df), batch_size):
        batch = df.iloc[start_index:start_index+batch_size]
        
        # Use ThreadPoolExecutor for concurrency on each batch
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = [
                executor.submit(process_single_row, row)
                for _, row in batch.iterrows()
            ]
            for future in as_completed(futures):
                result = future.result()
                if result:
                    items_complete_list.append(result)
    
    return items_complete_list

# Initialize a list to store the complete metadata
items_complete_list = process_items_in_batches(limited_items, batch_size=5)

# Create a new DataFrame from the collected metadata
items_complete = pd.DataFrame(
    items_complete_list,
    columns=[
        'i', 'isbn', 'title', 'subtitle', 'authors', 'publishedDate',
        'pageCount', 'maturityRating', 'language', 'textSnippet',
        'description', 'categories', 'thumbnail'
    ]
)

# Map 'NOT_MATURE' to 0 and other values to 1
items_complete['maturityRating'] = items_complete['maturityRating'].map(
    lambda x: 0 if x == 'NOT_MATURE' else 1
)

# Display the first few rows of the new DataFrame
items_complete


# Cell 16: Show items_complete_list (Python)
# ------------------------------------------
items_complete_list


[{'error': 'An error occurred: 429 Client Error: Too Many Requests for url: https://www.googleapis.com/books/v1/volumes?q=isbn:2343190194',
  'i': 2,
  'isbn': '2343190194',
  'title': 'Histoire de vie et recherche biographique : perspectives sociohistoriques /'},
 {'title': 'Classification décimale universelle : édition abrégée /',
  'subtitle': 'édition abrégée',
  'authors': ['UDC Consortium (The Hague)'],
  'publishedDate': '2012',
  'pageCount': 449,
  'maturityRating': 'NOT_MATURE',
  'language': 'fr',
  'textSnippet': None,
  'description': None,
  'categories': None,
  'thumbnail': None,
  'i': 0,
  'isbn': '9782871303336'},
 {'title': "Ce livre devrait me permettre de résoudre le conflit au Proche-Orient, d'avoir mon diplôme, et de trouver une femme /",
  'subtitle': None,
  'authors': ['Sylvain Mazas'],
  'publishedDate': '2012-06-07',
  'pageCount': 200,
  'maturityRating': 'NOT_MATURE',
  'language': 'fr',
  'textSnippet': None,
  'description': None,
  'categories': None,


In [None]:
import pandas as pd
import asyncio
import aiohttp
from aiolimiter import AsyncLimiter
import aiohttp_client_cache
from tqdm.asyncio import tqdm_asyncio
import nest_asyncio

nest_asyncio.apply()

# Increase throughput: up to 10 concurrent tasks
MAX_CONCURRENT_TASKS = 10
REQUESTS_PER_SECOND = 5

# Initialize rate limiter and semaphore
limiter = AsyncLimiter(max_rate=REQUESTS_PER_SECOND, time_period=1)
semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS)

# Install caching
cache = aiohttp_client_cache.CachedSession(cache_name='books_cache_fast', expire_after=86400)

async def get_book_details(session, isbn, row):
    if not isbn:
        return None

    primary_isbn = isbn.strip().split(';')[0]
    url = f"https://www.googleapis.com/books/v1/volumes?q=isbn:{primary_isbn}"

    async with semaphore:
        async with limiter:
            try:
                async with session.get(url) as response:
                    if response.status != 200:
                        return {"error": f"HTTP {response.status}", "isbn": primary_isbn, "i": row['i'], "title": row['Title']}
                    data = await response.json()

                    if 'items' not in data:
                        return {"error": "No books found.", "isbn": primary_isbn, "i": row['i'], "title": row['Title']}

                    book_info = data['items'][0]['volumeInfo']
                    return {
                        'i': row['i'],
                        'isbn': primary_isbn,
                        'title': row['Title'],
                        'subtitle': book_info.get('subtitle'),
                        'authors': book_info.get('authors'),
                        'publishedDate': book_info.get('publishedDate'),
                        'pageCount': book_info.get('pageCount'),
                        'maturityRating': book_info.get('maturityRating'),
                        'language': book_info.get('language'),
                        'textSnippet': book_info.get('searchInfo', {}).get('textSnippet'),
                        'description': book_info.get('description'),
                        'categories': book_info.get('categories'),
                        'thumbnail': book_info.get('imageLinks', {}).get('thumbnail')
                    }

            except Exception as e:
                return {"error": str(e), "isbn": primary_isbn, "i": row['i'], "title": row['Title']}


async def fetch_all_books(df):
    async with cache as session:
        tasks = [
            get_book_details(session, row['ISBN Valid'], row)
            for _, row in df.iterrows()
            if pd.notnull(row['ISBN Valid'])
        ]
        results = await tqdm_asyncio.gather(*tasks)
        return [r for r in results if r is not None]

# Remove duplicate ISBNs before querying
limited_items_unique = limited_items.drop_duplicates('ISBN Valid')

# Fetch all
items_complete_list = asyncio.run(fetch_all_books(limited_items_unique))

# Format to DataFrame
items_complete = pd.DataFrame(items_complete_list)

# Normalize maturityRating
items_complete['maturityRating'] = items_complete['maturityRating'].map(lambda x: 0 if x == 'NOT_MATURE' else 1)

# Done
items_complete.head()

 13%|█▎        | 64/492 [00:12<01:22,  5.19it/s]


KeyboardInterrupt: 

In [None]:
from gradio_client import Client

client = Client("Rahmat82/emotions_classifier")
result = client.predict(
		query="Trois histoires d'amour, un lanceur d'alerte, une adolescente égarée, deux processions, Bouddha et Confucius, un journaliste ambitieux, une mort tragique, le chat Joseph, une épouse impossible, le retour du passé, un parfum d'exotisme, une passion soudaine et irrésistible. Et quelques meurtres!!",
		api_name="/predict"
)
print(result)


Loaded as API: https://rahmat82-emotions-classifier.hf.space ✔
{'label': 'joy', 'confidences': [{'label': 'joy', 'confidence': 0.9117047190666199}, {'label': 'anger', 'confidence': 0.5091436505317688}, {'label': 'sadness', 'confidence': 0.5015572309494019}, {'label': 'fear', 'confidence': 0.36836233735084534}, {'label': 'love', 'confidence': 0.3647423982620239}, {'label': 'surprise', 'confidence': 0.22409018874168396}]}
