In [1]:
import pandas as pd
from sqlalchemy import create_engine, select, Table, MetaData
from sqlalchemy.orm import sessionmaker
import yaml
import time
from tqdm import tqdm

In [2]:
def conexion_db():
    """
    Establishes a connection to the SQL database.
    
    Returns:
        tuple: (engine, session) where:
            - engine: The SQLAlchemy Engine object connected to the database.
            - session: A SQLAlchemy session for executing queries.
    
    Raises:
        Exception: If the connection to the database fails.
    """
    try:
        ## URL of the database
        database_url = 'postgresql://postgres.pczyoeavtwijgtkzgcaz:D0jVgaoGmDAFuaMS@aws-0-eu-west-3.pooler.supabase.com:6543/postgres'
        engine = create_engine(database_url)
        session = sessionmaker(bind=engine)
        session = session()
        print("Connection to the database successful")
        return engine, session
    except:
        raise Exception("Error in the connection to the database")


In [3]:
engine, session = conexion_db()

Connection to the database successful


In [4]:
def get_table_names(engine):
    """
    Get the names of all tables in the database.
    
    Args:
        engine (sqlalchemy.engine.base.Engine): The SQLAlchemy Engine object connected to the database.
    
    Returns:
        list: A list of strings with the names of all tables in the database.
    """
    metadata = MetaData()
    metadata.reflect(engine)
    return metadata.tables.keys()

def get_table(engine, table_name, n_rows=5):
    """
    Get the first n rows of a table.
    
    Args:
        engine (sqlalchemy.engine.base.Engine): The SQLAlchemy Engine object connected to the database.
        table_name (str): The name of the table to query.
        n_rows (int): The number of rows to retrieve.
    
    Returns:
        pandas.DataFrame: A pandas DataFrame with the first n rows of the table.
    """
    return pd.read_sql(f'''SELECT * FROM "{table_name}" LIMIT {n_rows}''', engine)

In [5]:
print(get_table_names(engine))

dict_keys(['book', 'author', 'publisher', 'genre', 'series', 'awards', 'characters', 'book_author', 'book_publisher', 'book_genre', 'user_book_source', 'book_source', 'user', 'fav_medias', 'media', 'liked_publisher', 'liked_genres', 'liked_series', 'liked_author', 'book_series', 'liked_books', 'fav_books', 'reads_with', 'reading_mean', 'book_awards', 'user_book', 'book_characters'])


In [6]:
display(get_table(engine, 'book'))

Unnamed: 0,book_id,book_title,nb_of_pages,book_description,settings,isbn,isbn13,original_title,review_count,one_star_rating,two_star_rating,three_star_rating,four_star_rating,five_star_rating
0,29358729,Forever Pucked,318,"Being engaged to Alex Waters, team captain and...",,1682304183,9781682304181,,1004,81,308,1524,3449,4767
1,5582346,طوق الحمامة في الألفة والألاف,328,كتاب طوق الحمامة أو طوق الحمامة في الألفة والأ...,,9953435022,9789953435022,طوق الح٠ا٠ة [Ṭawq al-Ḥamāmah],706,120,303,1184,1744,1654
2,6316959,Veiled Freedom,432,"Naive and idealistic, relief worker Amy Mallor...",,1414314752,9781414314754,Veiled Freedom,76,13,41,116,192,193
3,11277484,Requiem,336,"Bin Okuma, a celebrated visual artist, has rec...",,1443406899,9781443406895,,231,10,37,202,541,438
4,13115184,Report from Planet Midnight,112,"Infused with feminist, Afro-Caribbean views of...",,1604864974,9781604864977,,71,6,16,99,180,133


In [7]:
def livre_pref_user(user_id, query, engine):
    """
    Get the books that a user has in his/her library.
    
    Args:
        user_id (int): The ID of the user.
    
    Returns:
        a list of books that the user has in his/her library.
    """
    query = f"""
    SELECT book.book_title
    FROM book
    JOIN liked_books ON book.book_id = liked_books.book_id
    WHERE liked_books.user_id = {user_id}
    """
    return pd.read_sql(query, engine)

livre_pref_user(20)

Unnamed: 0,book_title
0,Akame ga kill
1,1984


In [None]:
# La table users n'etant pas encore remplie, nous allons les simuler
# Nous allons creer 1000 utilisateurs avec des pseudo aleatoires 2 ou 3 livre preferes( parmis la table book), un nombre de livre lus aleatoire entre 0 et 10 et un temps de lecture aleatoire entre 0 et 1000
import random
import string

def random_string(length):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

def random_books(n_books, df_books):
    return random.sample(list(df_books['book_title']), n_books)

def random_users(n_users, df_books):
    users = []
    for i in range(n_users):
        user = {
            'username': random_string(10),
            'favorite_books': random_books(random.randint(2, 3), df_books),
            'liked_books': random_books(random.randint(0, 5), df_books),
            'books_read': random.randint(0, 10),
            'time_read': random.randint(0, 1000)
        }
        users.append(user)
    return users

df_books = get_table(engine, 'book', n_rows=5000)
df_users = get_table(engine, 'user', n_rows=200)
## users = random_users(5000, df_books)
## df_users = pd.DataFrame(users)
df_users.head()

Unnamed: 0,user_id,username,password,age,gender,nb_book_per_year,nb_book_pleasure,nb_book_work,initiated_by,reading_time,choice_motivation
0,1,diespurmann,mdp,20,M,1 à 5,1 à 5,0,Personne,Avant de dormir,Quatrième de couverture
1,2,juliamaricaoudin29,mdp,19,F,1 à 5,1 à 5,1 à 5,Un professeur,Après-midi,"Recommandations de proches, Réseaux sociau/inf..."
2,3,diane.moneger2,mdp,20,F,Plus de 20,Plus de 20,1 à 5,Un membre de votre famille,Nuit,Quatrième de couverture
3,4,beaulieu.stefan.2004,mdp,20,M,1 à 5,1 à 5,0,Un membre de votre famille,Après-midi,"Recommandations de proches, Réseaux sociau/inf..."
4,5,raxiou80,mdp,20,M,Plus de 20,Plus de 20,1 à 5,moi,Avant de dormir,"Recommandations de proches, Couverture"


In [9]:
def recoUserBased(user, userDF, k=5):
    dicoRecos = {}
    dicoUserSim = {}
    # On recupere les livres lus par l'utilisateur
    livresLus = set(user['liked_books'])
    # On recupere les livres preferes de l'utilisateur
    livresPref = set(user['favorite_books'])

    #on ajoute les livres preferes dans les livres lus si ils ne sont pas deja presents
    livresLus = livresLus.union(livresPref)
    # On recupere les livres lus par les autres utilisateurs
    
    for i, u in userDF.iterrows():
        if u['username'] != user['username']:
            livresLusAutre = set(u['liked_books'])
            livresPrefAutre = set(u['favorite_books'])
            # On calcule le nombre de livres en commun entre l'utilisateur et les autres utilisateurs
            nbLivreCommun = len(livresLus.intersection(livresLusAutre))
            nbLivrePrefCommun = len(livresPref.intersection(livresPrefAutre))
            # On calcule le score de similarite entre l'utilisateur et les autres utilisateurs
            score = nbLivreCommun + 1.2 * nbLivrePrefCommun
            dicoUserSim[u['username']] = score
    
    # On trie les utilisateurs en fonction de leur score de similarite
    dicoUserSim = dict(sorted(dicoUserSim.items(), key=lambda item: item[1], reverse=True))
    
    # quel sont les livre non lu par l'utilisateur qui sont lu par les utilisateurs les plus similaires (le 1er qartile)
    for u in list(dicoUserSim.keys())[:len(dicoUserSim)//4]:
        for livre in userDF[userDF['username'] == u]['liked_books'].values[0]:
            if livre not in livresLus:
                if livre in dicoRecos:
                    dicoRecos[livre] += 1
                else:
                    dicoRecos[livre] = 1
    # si le dictionnaire est vide on continue avec les 10 utilisateurs suivants les plus similaires tant que le dictionnaire est vide
    i = 0
    while len(dicoRecos) == 0 and i < 10:
        for u in list(dicoUserSim.keys())[i*len(dicoUserSim)//4:(i+1)*len(dicoUserSim)//4]:
            for livre in userDF[userDF['username'] == u]['liked_books'].values[0]:
                if livre not in livresLus:
                    if livre in dicoRecos:
                        dicoRecos[livre] += 1
                    else:
                        dicoRecos[livre] = 1
        i += 1
    print(i)
    # On trie les livres en fonction du nombre de fois qu'ils ont ete recommandes
    dicoRecos = dict(sorted(dicoRecos.items(), key=lambda item: item[1], reverse=True))
    return dicoRecos



In [10]:
print(f"l'utilisateur 0 a pour livre prefere : {df_users.iloc[0]['favorite_books']}")
print(f"l'utilisateur 0 a pour livre lu : {df_users.iloc[0]['liked_books']}")
print(f"recomandation : {recoUserBased(df_users.iloc[0], df_users, 5)}")

KeyError: 'favorite_books'