# Dataset

In [20]:
from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [21]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [22]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)
print("number of ratings", len(rating_registry.ratings))
print("rating", rating_registry.ratings[0].rating)
print("timestamp", rating_registry.ratings[0].timestamp)

number of ratings 932293
rating 5
timestamp 978300760


# Tranform Data to Logic Programs

In [18]:
def generate_progol_program(user_registry, product_registry, rating_registry, output_file_name):
    
    # Mode declarations
    modeh = f"modeh(*, recommend(+user, -movie)).\n"
    user_features = ["m", 'f', "under18", "b18to24", "b25to34", "b35to44", "b45to49", "b50to55", "plus56"]
    movie_features = ["action", "adventure", "animation",  "childrens", "comedy", "crime", "documentary", "drama", "fantasy", "filmnoir", "horror", "musical", "mystery", "romance", "sci_fi", "thriller", "western",  "war"]
    both_features = ["watched"]
    age_dic = {"Under 18": "under18", "56+": "plus56", "25-34": "b25to34", "45-49": "b45to49", "50-55": "b50to55", "35-44": "b35to44", "18-24": "b18to24"}

    modeb_list = [f"modeb(*, {feature}(+user))." for feature in user_features]
    modeb_list +=[f"modeb(*, {feature}(+movie))." for feature in movie_features]
    modeb_list += [f"modeb(*, {feature}(+user,-movie))." for feature in both_features]

    # Determinations
    determinations = []
    for f in user_features+movie_features:
        determinations.append(f"determination(recommend/2, {f}/1).")
    for f in both_features:
        determinations.append(f"determination(recommend/2, {f}/2).")
    

    # Background knowledge
    background_knowledge = []
    for user in user_registry.users:
        user_id = user.uid
        user_gender = user.gender
        user_age = user.age
        background_knowledge.append(f"{user_gender.lower()}({user_id}).")
        background_knowledge.append(f"{age_dic[user_age]}({user_id}).")
    for product in product_registry.products:
        movie_id = product.pid
        movie_name = product.name
        movie_name = movie_name.lower()
        movie_name = movie_name.replace("-","_")
        movie_name = movie_name.replace(" ","_")
        movie_name = movie_name.replace("(","_")
        movie_name = movie_name.replace(")","")
        movie_name = movie_name.replace("'","")
        movie_name = movie_name.replace(":","")
        movie_name = movie_name.replace(",","")
        movie_name = movie_name.replace("?","")
        movie_name = movie_name.replace("!","")
        movie_name = movie_name.replace(".","")
        movie_name = movie_name.replace("&","")
        movie_name = movie_name.replace("*","")
        movie_name = movie_name.replace("/","_")
        movie_name = movie_name.replace("$","")
        movie_name = movie_name.replace("#", "")
        movie_name = movie_name.replace("é", "")
        movie_name = movie_name.replace(";", "")
        if movie_name[0].isdigit():
            movie_name = "m"+movie_name
        movie_gender = product.genre
        movie_gender = movie_gender.lower()
        movie_gender = movie_gender.replace("-","_")
        movie_gender = movie_gender.replace("'","")
        movie_gender = movie_gender.replace("&","")
        background_knowledge.append(f"m_gender({movie_id},{movie_gender}).")
        background_knowledge.append(f"m_name({movie_id},{movie_name}).")
    for rating in rating_registry.ratings:
        user_id = rating.user.uid
        movie_id = rating.product.pid
        background_knowledge.append(f"watched({user_id},{movie_id}).")
        
    
    # Positive and negative examples
    positive_examples = []
    negative_examples = []
    for rating in rating_registry.ratings:
        score = rating.rating
        user_id = rating.user.uid
        movie_id = rating.product.pid
        if score > 3:
            positive_examples.append(f"recommend({user_id},{movie_id}).")
        else:
            negative_examples.append(f"recommend({user_id},{movie_id}).")

    # Combine all parts into a Progol-compatible logic program
    progol_program = "% Mode Declarations\n"
    progol_program += modeh
    progol_program += "\n".join(modeb_list) + "\n\n"

    progol_program += "% Determinations\n"
    progol_program += "\n".join(determinations) + "\n\n"

    progol_program += "% Background Knowledge\n:- begin_bg.\n"
    progol_program += "\n".join(background_knowledge) + "\n:- end_bg.\n\n"

    progol_program += "% Positive Examples\n:- begin_in_pos.\n"
    progol_program += "\n".join(positive_examples) + "\n:- end_in_pos.\n\n"

    progol_program += "% Negative Examples\n:- begin_in_neg.\n"
    progol_program += "\n".join(negative_examples) + "\n:- end_in_neg.\n"

    # Save the Progol logic program to a file with a custom name
    output_file_path = f"{output_file_name}.pl"
    with open(output_file_path, "w") as file:
        file.write(progol_program)

    return output_file_path

# Generate the Progol logic program
# Usage example:
output_file_name = './movie_rec'
output_file = generate_progol_program(user_registry, product_registry, rating_registry, output_file_name)
print(f"Progol logic program saved as {output_file}")

Progol logic program saved as ./movie_rec.pl


In [38]:
import numpy as np
import os

def generate_progol_program_splitted(user_registry, product_registry, rating_registry, output_file_name):
    
    # Mode declarations
    modeh = f"modeh(*, recommend(+user, -movie)).\n"
    user_features = ["m", 'f', "under18", "b18to24", "b25to34", "b35to44", "b45to49", "b50to55", "plus56"]
    movie_features = ["action", "adventure", "animation",  "childrens", "comedy", "crime", "documentary", "drama", "fantasy", "filmnoir", "horror", "musical", "mystery", "romance", "sci_fi", "thriller", "western",  "war"]
    both_features = ["watched"]
    age_dic = {"Under 18": "under18", "56+": "plus56", "25-34": "b25to34", "45-49": "b45to49", "50-55": "b50to55", "35-44": "b35to44", "18-24": "b18to24"}

    modeb_list = [f"modeb(*, {feature}(+user))." for feature in user_features]
    modeb_list +=[f"modeb(*, {feature}(+movie))." for feature in movie_features]
    modeb_list += [f"modeb(*, {feature}(+user,-movie))." for feature in both_features]

    # Determinations
    determinations = []
    for f in user_features+movie_features:
        determinations.append(f"determination(recommend/2, {f}/1).")
    for f in both_features:
        determinations.append(f"determination(recommend/2, {f}/2).")
    
    # Positive and negative examples
    positive_examples = []
    negative_examples = []
    for rating in rating_registry.ratings:
        score = rating.rating
        user_id = rating.user.uid
        movie_id = rating.product.pid
        if score > 3:
            positive_examples.append(f"recommend({user_id},{movie_id}).")
        else:
            negative_examples.append(f"recommend({user_id},{movie_id}).")
    
    np.random.shuffle(positive_examples)
    np.random.shuffle(negative_examples)
    nb_partitions = 100000
    nb_positive_per_partition = len(positive_examples)//nb_partitions
    nb_negative_per_partition = len(negative_examples)//nb_partitions
    
    for i in range(nb_partitions):
        start_pos = i * nb_positive_per_partition
        end_pos = start_pos + nb_positive_per_partition
        start_neg = i * nb_negative_per_partition
        end_neg = start_neg + nb_negative_per_partition
        
        pos_examples_for_partition = positive_examples[start_pos:end_pos]
        neg_examples_for_partition = negative_examples[start_neg:end_neg]
        
        users_in_partition = set()
        movies_in_partition = set()
        for p_ex in pos_examples_for_partition:
            users_in_partition.add(int(p_ex.split(',')[0].split('(')[-1]))
            movies_in_partition.add(int(p_ex.split(',')[-1].split(')')[0]))
        for p_ex in neg_examples_for_partition:
            users_in_partition.add(int(p_ex.split(',')[0].split('(')[-1]))
            movies_in_partition.add(int(p_ex.split(',')[-1].split(')')[0]))

        # Background knowledge
        background_knowledge = []
        for user in user_registry.users:
            user_id = user.uid
            if user_id in list(users_in_partition):
                user_gender = user.gender
                user_age = user.age
                background_knowledge.append(f"{user_gender.lower()}({user_id}).")
                background_knowledge.append(f"{age_dic[user_age]}({user_id}).")
        for product in product_registry.products:
            movie_id = product.pid
            if movie_id in movies_in_partition:
                movie_name = product.name
                movie_name = movie_name.lower()
                movie_name = movie_name.replace("-","_")
                movie_name = movie_name.replace(" ","_")
                movie_name = movie_name.replace("(","_")
                movie_name = movie_name.replace(")","")
                movie_name = movie_name.replace("'","")
                movie_name = movie_name.replace(":","")
                movie_name = movie_name.replace(",","")
                movie_name = movie_name.replace("?","")
                movie_name = movie_name.replace("!","")
                movie_name = movie_name.replace(".","")
                movie_name = movie_name.replace("&","")
                movie_name = movie_name.replace("*","")
                movie_name = movie_name.replace("/","_")
                movie_name = movie_name.replace("$","")
                movie_name = movie_name.replace("#", "")
                movie_name = movie_name.replace("é", "")
                movie_name = movie_name.replace(";", "")
                if movie_name[0].isdigit():
                    movie_name = "m"+movie_name
                movie_gender = product.genre
                movie_gender = movie_gender.lower()
                movie_gender = movie_gender.replace("-","_")
                movie_gender = movie_gender.replace("'","")
                movie_gender = movie_gender.replace("&","")
                background_knowledge.append(f"{movie_gender}({movie_id}).")
                #background_knowledge.append(f"m_gender({movie_id},{movie_gender}).")
                #background_knowledge.append(f"m_name({movie_id},{movie_name}).")
        """for user in users_in_partition:
            user_rating = rating_registry.find_user_ratings(user)
            user_rating.sort(key=lambda rating: rating.timestamp, reverse=True)
            percentage_recent = 0.2
            len_rating = int(len(user_rating)*percentage_recent)
            user_rating = user_rating[:len_rating]
            for rating in user_rating:
                movie_id = rating.product.pid
                if user_id in users_in_partition and movie_id in movies_in_partition:
                    background_knowledge.append(f"watched({user_id},{movie_id}).") """
        for rating in pos_examples_for_partition:
            user_id = rating.split(',')[0].split('(')[-1]
            movie_id = rating.split(',')[-1].split(')')[0]
            background_knowledge.append(f"watched({user_id},{movie_id}).")
        for rating in neg_examples_for_partition:
            user_id = rating.split(',')[0].split('(')[-1]
            movie_id = rating.split(',')[-1].split(')')[0]
            background_knowledge.append(f"watched({user_id},{movie_id}).")
                
        

        # Combine all parts into a Progol-compatible logic program
        progol_program = "% Mode Declarations\n"
        progol_program += modeh
        progol_program += "\n".join(modeb_list) + "\n\n"

        progol_program += "% Determinations\n"
        progol_program += "\n".join(determinations) + "\n\n"

        progol_program += "% Background Knowledge\n:- begin_bg.\n"
        progol_program += "\n".join(background_knowledge) + "\n:- end_bg.\n\n"

        progol_program += "% Positive Examples\n:- begin_in_pos.\n"
        progol_program += "\n".join(pos_examples_for_partition) + "\n:- end_in_pos.\n\n"

        progol_program += "% Negative Examples\n:- begin_in_neg.\n"
        progol_program += "\n".join(neg_examples_for_partition) + "\n:- end_in_neg.\n"

        # Save the Progol logic program to a file with a custom name
        if not os.path.exists("moviepartitions/"):
            os.makedirs("moviepartitions")
        output_file_path = f"moviepartitions/{output_file_name}_partition{i}.pl"
        with open(output_file_path, "w") as file:
            file.write(progol_program)
        qkfsjqlk

# Generate the Progol logic program
# Usage example:
output_file_name = './movie_rec'
generate_progol_program_splitted(user_registry, product_registry, rating_registry, output_file_name)
print(f"Progol logic program saved")

NameError: name 'qkfsjqlk' is not defined

# ILP Learning 

In [62]:
from andante.program import AndanteProgram 
apmovies = AndanteProgram.build_from("moviepartitions/movie_rec_partition0.pl")
#apmovies = AndanteProgram.build_from("moviepartitions/working.pl")
#apmovies = AndanteProgram.build_from("moviepartitions/trains.pl")
#apmovies = AndanteProgram.build_from("moviepartitions/family.pl")
#apmovies = AndanteProgram.build_from("moviepartitions/minimovie.pl")
apmovies.induce(update_knowledge=True, logging=True, verbose=0)

Knowledge object (class: TreeShapedKnowledge)
Clauses:
   recommend(A, B) :- watched(A, B), drama(B).