In [1]:
from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [2]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [3]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)

In [4]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import scipy
import matplotlib.pyplot as plt

In [5]:
rating_registry.ratings

[Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=451, pid=1193, name="One Flew Over the Cuckoo's Nest (1975)", genre='Drama'), rating=5, timestamp=978300760),
 Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=1734, pid=661, name='James and the Giant Peach (1996)', genre='Animation'), rating=3, timestamp=978302109),
 Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=1027, pid=914, name='My Fair Lady (1964)', genre='Musical'), rating=3, timestamp=978301968),
 Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=2785, pid=3408, name='Erin Brockovich (2000)', genre='Drama'), rating=4, timestamp=978300275),
 Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=2009, pid=2355, name="Bug's Life, A (1998)", genre='Animation'), rating=5, timestamp=978824291),
 Rating(user=User(eid=0, uid=1, gender='F', age='Under 18'), product=Product(eid=1188, pid=1

In [6]:
# Extracting attributes from the Rating objects
ratings_data = [
    {
        "userid": rating.user.uid,
        "gender": rating.user.gender,
        "age": rating.user.age,
        "productid": rating.product.pid,
        "pname": rating.product.name,
        "pgenre": rating.product.genre,
        "rating": rating.rating,
        "timestamp": rating.timestamp
    }
    for rating in rating_registry.ratings
]

# Converting to DataFrame
ratings = pd.DataFrame(ratings_data)

# Displaying the first few rows of the DataFrame
ratings.head(5)

Unnamed: 0,userid,gender,age,productid,pname,pgenre,rating,timestamp
0,1,F,Under 18,1193,One Flew Over the Cuckoo's Nest (1975),Drama,5,978300760
1,1,F,Under 18,661,James and the Giant Peach (1996),Animation,3,978302109
2,1,F,Under 18,914,My Fair Lady (1964),Musical,3,978301968
3,1,F,Under 18,3408,Erin Brockovich (2000),Drama,4,978300275
4,1,F,Under 18,2355,"Bug's Life, A (1998)",Animation,5,978824291


In [7]:
output_file = "ratings_dataset.xlsx"
ratings.to_excel(output_file, index=False)

print(f"Dataset successfully saved to {output_file}")


Dataset successfully saved to ratings_dataset.xlsx


In [8]:
ratings.shape

(932293, 8)

In [9]:
sample_df = ratings.sample(n=118, random_state=42)

# Saving the sample DataFrame to an Excel file
output_file = "ratings_sample_dataset.xlsx"
sample_df.to_excel(output_file, index=False)

print(f"Sample dataset successfully saved to {output_file}")

Sample dataset successfully saved to ratings_sample_dataset.xlsx


# Everything is encoded in here and working. 

##### Generate balanced partitions and generate progol program for each partiton

In [10]:
import pandas as pd
import numpy as np
import os

def generate_balanced_partitions(file_path, output_dir, num_partitions, use_actor=False):
    # Load the dataset
    data = pd.read_excel(file_path)

    # Define all possible categories for age, genres, and gender
    possible_ages = {
        "Under 18": "under_18",
        "18-24": "b18to24",
        "25-34": "b25to34",
        "35-44": "b35to44",
        "45-49": "b45to49",
        "50-55": "b50to55",
        "56+": "plus56"
    }
    
    possible_genres = {
        "Action": "action",
        "Adventure": "adventure",
        "Animation": "animation",
        "Children's": "childrens",
        "Comedy": "comedy",
        "Crime": "crime",
        "Documentary": "documentary",
        "Drama": "drama",
        "Fantasy": "fantasy",
        "Film-Noir": "filmnoir",
        "Horror": "horror",
        "Musical": "musical",
        "Mystery": "mystery",
        "Romance": "romance",
        "Sci-Fi": "sci_fi",
        "Thriller": "thriller",
        "Western": "western",
        "War": "war"
    }

    possible_genders = ["m", "f"]

    # Split the data into positive and negative examples
    positive_data = data[data['rating'] > 3].copy()
    negative_data = data[data['rating'] <= 3].copy()

    # Shuffle the data
    positive_data = positive_data.sample(frac=1, random_state=42).reset_index(drop=True)
    negative_data = negative_data.sample(frac=1, random_state=42).reset_index(drop=True)

    # Determine the size of each partition
    pos_partition_size = len(positive_data) // num_partitions
    neg_partition_size = len(negative_data) // num_partitions

    # Ensure that the partitions are balanced
    partitions = []
    for i in range(num_partitions):
        pos_start = i * pos_partition_size
        pos_end = pos_start + pos_partition_size
        neg_start = i * neg_partition_size
        neg_end = neg_start + neg_partition_size

        # Handle remainders by distributing them to the partitions
        if i == num_partitions - 1:
            pos_end = len(positive_data)
            neg_end = len(negative_data)

        partition = pd.concat([
            positive_data.iloc[pos_start:pos_end],
            negative_data.iloc[neg_start:neg_end]
        ]).reset_index(drop=True)
        
        partitions.append(partition)

    # Generate Prolog programs for each partition
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    for i, partition in enumerate(partitions):
        output_file_name = f"{output_dir}/prolog_program_partition_{i+1}"
        generate_progol_program_for_partition(partition, possible_ages, possible_genres, possible_genders, output_file_name, use_actors=use_actor)

def generate_progol_program_for_partition(partition, possible_ages, possible_genres, possible_genders, output_file_name, use_actors=False):
    mapping_dict_product_to_kg = {}
    mapping_dict_kg_to_product = {}
    with open("results/ml1m/preprocessed/pgpr/mappings/product_mapping.txt", "r") as file:
        next(file)  # Skip the header
        for line in file:
            rating_id, new_id = line.strip().split("\t")
            mapping_dict_product_to_kg[str(new_id)] = str(rating_id)
            mapping_dict_kg_to_product[str(rating_id)] = str(new_id)
            
    # Prepare containers for Prolog facts
    positive_examples = []
    negative_examples = []
    background_knowledge = set()

    # Track the present predicates
    present_ages = set()
    present_genres = set()
    present_genders = set()
    if use_actors:
        relations = pd.read_csv("results/ml1m/preprocessed/kg_final.txt", sep="\t", header=0)
        movies_actors = relations[relations['relation']==4]
        movies_directors = relations[relations['relation']==9]


    # Process each row in the partition
    for _, row in partition.iterrows():
        user_id = f"u{row['userid']}"
        movie_id = f"m{row['productid']}"
        rating = row['rating']
        age_group = possible_ages.get(row['age'], "").lower()  # Replace age group with corresponding Prolog predicate
        gender = row['gender'].lower()  # Normalize gender
        genre = possible_genres.get(row['pgenre'], "").lower()  # Replace genre with corresponding Prolog predicate

        # Generate positive and negative examples
        if rating > 3:
            positive_examples.append(f"recommend({user_id}, {movie_id}).")
        else:
            negative_examples.append(f"recommend({user_id}, {movie_id}).")

        # Background knowledge: user attributes
        if age_group:
            background_knowledge.add(f"{age_group}({user_id}).")
            present_ages.add(age_group)
        if gender in possible_genders:
            background_knowledge.add(f"{gender}({user_id}).")
            present_genders.add(gender)

        # Background knowledge: movie genre
        if genre:
            background_knowledge.add(f"{genre}({movie_id}).")
            present_genres.add(genre)
        if use_actors:    
            # Add the director and actor informations
            movie_mapping = int(mapping_dict_product_to_kg[movie_id[1:]])
            movies_actor = movies_actors[movies_actors['entity_head']==movie_mapping]['entity_tail']
            for m_actor in movies_actor:
                background_knowledge.add(f"movie_actor({movie_id},{m_actor}).")
    
    # use a part of the recommendations as watched background knowledge
    np.random.shuffle(positive_examples)
    np.random.shuffle(negative_examples)
    pos_l = len(positive_examples)
    neg_l = len(negative_examples)
    watched = positive_examples[:int(pos_l*0.5)] + negative_examples[:int(neg_l*0.5)]
    positive_examples = positive_examples[int(pos_l*0.5):]
    negative_examples = negative_examples[int(neg_l*0.5):]
    watched = [m.replace('recommend','watched') for m in watched]
    background_knowledge = background_knowledge.union(set(watched))
    similar = [f"similar(A,B):- watched(A,X), {genre}(X), {genre}(Y), watched(B,Y)." for genre in present_genres]
    background_knowledge = background_knowledge.union(set(similar))
    
    
    

    # Mode declarations
    modeh_declaration = "modeh(*, recommend(+user, +movie))."
    modeb_declarations = [
        f"modeb(*, {age}(+user))." for age in present_ages
    ] + [
        f"modeb(*, watched(+user, +movie))."
    ] + [
        f"modeb(*, {gender}(+user))." for gender in present_genders
    ] + [
        f"modeb(*, {genre}(+movie))." for genre in present_genres
    ] + [
        f"modeb(*, similar(+user, +user))."
    ]
    if use_actors:
        modeb_declarations.append(f"modeb(*, movie_actor(+movie, -actor)).")
    
    modeb_declarations = [declaration for declaration in modeb_declarations if declaration]  # Remove empty strings

    # Determinations
    determinations = [
        f"determination(recommend/2, {age}/1)." for age in present_ages
    ] + [
        f"determination(recommend/2, watched/2)."
    ] + [
        f"determination(recommend/2, {gender}/1)." for gender in present_genders
    ] + [
        f"determination(recommend/2, {genre}/1)." for genre in present_genres
    ] + [
        f"determination(recommend/2, similar/2)."
    ]
    if use_actors:
        determinations.append(f"determination(recommend/2, movie_actor/2).")
    
    determinations = [determination for determination in determinations if determination]  # Remove empty strings

    # Combine all parts into a Progol-compatible logic program
    progol_program = "% Mode Declarations\n"
    progol_program += modeh_declaration + "\n"
    progol_program += "\n".join(modeb_declarations) + "\n\n"

    progol_program += "% Determinations\n"
    progol_program += "\n".join(determinations) + "\n\n"

    progol_program += "% Background Knowledge\n:- begin_bg.\n"
    progol_program += "\n".join(sorted(background_knowledge)) + "\n:- end_bg.\n\n"

    progol_program += "% Positive Examples\n:- begin_in_pos.\n"
    progol_program += "\n".join(positive_examples) + "\n:- end_in_pos.\n\n"

    progol_program += "% Negative Examples\n:- begin_in_neg.\n"
    progol_program += "\n".join(negative_examples) + "\n:- end_in_neg.\n"

    # Save the Progol logic program to a file with a custom name
    output_file_path = f"{output_file_name}.pl"
    with open(output_file_path, "w") as file:
        file.write(progol_program)

    print(f"Progol logic program saved as {output_file_path}")

# Usage example:
num_partitions = 5
file_path = 'ratings_sample_dataset.xlsx'  # Replace with your actual file path
output_dir = './prolog_partitions_'+str(num_partitions)  # Directory to save the partitions
generate_balanced_partitions(file_path, output_dir, num_partitions, use_actor=True)


KeyError: '3160'

In [4]:
from andante.program import AndanteProgram 
apmovies = AndanteProgram.build_from("../prolog_partitions_six/prolog_program_partition_5.pl")
H = apmovies.induce(update_knowledge=True, logging=True, verbose=0)
H.clauses

OrderedSet([recommend(A, B) :- b45to49(A)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- crime(B)., recommend(A, B) :- horror(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- adventure(B).])

# Query 
#### Lance une requête pour déterminer si B est recommendé à A en utilisant les règles et les faits définis.

In [5]:
apmovies.query("recommend(A,B).")

(True,
       0      1      2    3      4      5      6      7      8      9   ...  \
 A  m1348  m1259  m2987  m47  u1778  u1778  u1778  u1778  u1778  u1778  ...   
 B  m1348  m1259  m2987  m47   m924    m17  m1960  m1178  m1619   m337  ...   
 
       38     39    40    41     42     43     44    45     46     47  
 A  u2777  u2777  u621  u621   u621   u621   u621  u621  u3539  u5127  
 B  m1619   m337  m924   m17  m1960  m1178  m1619  m337  u3539  u5127  
 
 [2 rows x 48 columns])

#### Test with new user 

In [7]:
def add_new_user_to_existing_program(file_path, new_user_id, new_user_age, new_user_gender, new_movie_id, user_movie_genre, rating_movie):
    # Lire le fichier existant
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Vérifier si l'utilisateur existe déjà dans le fichier
    user_exists = any(f"{new_user_id}" in line for line in lines)

    if user_exists:
        print(f"L'utilisateur {new_user_id} existe déjà dans le programme.")
        return

    # Variables pour garder les lignes modifiées
    new_lines = []
    bg_inserted = False
    pos_inserted = False
    neg_inserted = False

    for line in lines:
        # Ajouter les nouvelles données dans la bonne section
        if ":- end_bg." in line and not bg_inserted:
            # Ajouter les nouvelles données de background knowledge avant la fin de la section bg
            new_lines.append(f"{new_user_age}({new_user_id}).\n")
            new_lines.append(f"{new_user_gender}({new_user_id}).\n")
            new_lines.append(f"{user_movie_genre}({new_movie_id}).\n")
            bg_inserted = True

        if ":- end_in_pos." in line and not pos_inserted and rating_movie > 3:
            # Ajouter la recommandation positive si le rating est > 3 avant la fin de la section pos
            new_lines.append(f"recommend({new_user_id}, {new_movie_id}).\n")
            pos_inserted = True

        if ":- end_in_neg." in line and not neg_inserted and rating_movie <= 3:
            # Ajouter la recommandation négative si le rating est <= 3 avant la fin de la section neg
            new_lines.append(f"recommend({new_user_id}, {new_movie_id}).\n")
            neg_inserted = True

        # Ajouter la ligne originale
        new_lines.append(line)

    # Sauvegarder les modifications dans le fichier
    with open(file_path, 'w') as file:
        file.writelines(new_lines)
    
    print(f"Le programme logique a été mis à jour et sauvegardé dans {file_path}")

# Exemple d'utilisation
file_path = '../prolog_partitions_six/prolog_program_partition_5.pl'

new_user_id = "u777"
new_user_age = "b25to34"  # Groupe d'âge: 25-34
new_user_gender = "m"  # Sexe: Masculin
new_movie_id = "m777"
user_movie_genre = "action"
rating_movie = 4

add_new_user_to_existing_program(file_path, new_user_id, new_user_age, new_user_gender, new_movie_id, user_movie_genre, rating_movie)


L'utilisateur u777 existe déjà dans le programme.


In [None]:
from andante.program import AndanteProgram

# Chemin vers le fichier Prolog
file_path = 'prolog_partitions_six/prolog_program_partition_5.pl'

# Ajouter un nouvel utilisateur au programme
new_user_id = "u777"
new_user_age = "b25to34"  # Groupe d'âge: 25-34
new_user_gender = "m"  # Sexe: Masculin
new_movie_id = "m777"
user_movie_genre = "action"
rating_movie = 4

#add_new_user_to_existing_program(file_path, new_user_id, new_user_age, new_user_gender, new_movie_id, user_movie_genre, rating_movie)

# Charger le programme Prolog avec les nouvelles données
ap = AndanteProgram.build_from(file_path)

# Définir les paramètres pour l'inférence
ap.set('verbose', 1)
ap.set('h', 100)

# Générer les règles si elles ne sont pas déjà générées
induced_rules = ap.induce(update_knowledge=True, logging=True, verbose=0)

# Recommander un film pour le nouvel utilisateur
result, df = ap.query(f"recommend({new_user_id}, Movie).")

# Afficher les recommandations
if result:
    print(f"Recommandations pour {new_user_id}:")
    print(df)
else:
    print(f"Aucune recommandation trouvée pour {new_user_id}.")


L'utilisateur u777 existe déjà dans le programme.





h 0
Atom recommend(u777, Movie)
Candidates {recommend(A, B) :- crime(B)., recommend(A, B) :- adventure(B)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- b45to49(A)., recommend(A, B) :- horror(B).}
Match {recommend(A, B) :- crime(B)., recommend(A, B) :- adventure(B)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- b45to49(A)., recommend(A, B) :- horror(B).}
Clause recommend(A, B) :- crime(B).
Substitution {A: u777, B: Movie}
Atoms [crime(Movie)]

h 1
Atom crime(Movie)
Candidates {crime(m47).}
Match {crime(m47).}
Clause crime(m47).
Substitution {A: u777, B: m47, Movie: m47}
Atoms []

h 2
Atom recommend(u777, Movie)
Clause recommend(A, B) :- adventure(B).
Substitution {A: u777, B: Movie}
Atoms [adventure(Movie)]

h 3
Atom adventure(Movie)
Candidates {adventure(m2987)., adventure(m1259).}
Match {adventure(m2987)., adventure(m1259).}
Clause adventure(m2987).
Substitution {A: u777, B: m

In [8]:
# Define new user and rule
new_user = "john"  # Assume 'John' is the new user
new_rule_for_user = f"recommend({new_user}, X)."  # Rule specific to the new user

# Query the system
success, substitutions = apmovies.query(new_rule_for_user)

# Display result
if success:
    print(f"The rule was satisfied for {new_user}. Movies recommended:")
    print(substitutions)
else:
    print(f"No movie recommendations found for {new_user}.")



The rule was satisfied for john. Movies recommended:
     0      1      2      3
X  m47  m1348  m1259  m2987


In [21]:
apmovies = AndanteProgram.build_from("../prolog_partitions_six/prolog_program_partition_5.pl")

# Perform learning or querying as usual
induced_hypotheses = apmovies.induce(update_knowledge=True, logging=True, verbose=0)
induced_hypotheses

Knowledge object (class: TreeShapedKnowledge)
Clauses:
   recommend(A, B) :- b45to49(A).
   recommend(A, B) :- b18to24(A), drama(B).
   recommend(A, B) :- crime(B).
   recommend(A, B) :- horror(B).
   recommend(A, B) :- b25to34(A), f(A).
   recommend(A, B) :- adventure(B).

In [20]:
from andante.program import AndanteProgram 
# Generate or load an AndanteProgram instance
apmovies = AndanteProgram.build_from("../prolog_partitions_six/prolog_program_partition_5.pl")

# Perform learning or querying as usual
induced_hypotheses = apmovies.induce(update_knowledge=True, logging=True, verbose=0)
apmovies.results = induced_hypotheses
print(apmovies.results) 
hgffd
apmovies.query("recommend(A, B).")

# Save the AndanteProgram to a file
apmovies.save("saved_andante_program.pkl")

# Later, load the AndanteProgram from the saved file
loaded_apmovies = AndanteProgram.load("saved_andante_program.pkl")

# query or further using the loaded program without re-inducing
loaded_apmovies.query("recommend(A, B).")


None


NameError: name 'hgffd' is not defined

In [19]:
# 1. Save the AndanteProgram instance
apmovies.save("saved_andante_program.pkl")

# 2. Load the AndanteProgram instance
loaded_apmovies = AndanteProgram.load("saved_andante_program.pkl")

# 3. Access attributes such as `results`
print("Results from loaded program:")
print(loaded_apmovies.results)

# 4. Use the `query` method on the loaded program
success, substitutions = loaded_apmovies.query("recommend(User, Movie).")

# 5. Display the query results
if success:
    print("Query successful. Substitutions found:")
    print(substitutions)
else:
    print("Query failed. No substitutions found.")

AndanteProgram saved to saved_andante_program.pkl
Results from loaded program:
None
Query successful. Substitutions found:
          0      1      2    3      4      5      6      7      8      9   \
User   m1348  m1259  m2987  m47  u1778  u1778  u1778  u1778  u1778  u1778   
Movie  m1348  m1259  m2987  m47   m924    m17  m1960  m1178  m1619   m337   

       ...     38     39    40    41     42     43     44    45     46     47  
User   ...  u2777  u2777  u621  u621   u621   u621   u621  u621  u3539  u5127  
Movie  ...  m1619   m337  m924   m17  m1960  m1178  m1619  m337  u3539  u5127  

[2 rows x 48 columns]


# Combine rules from different partitions 

In [2]:
import collections
from andante.program import AndanteProgram
from andante.collections import OrderedSet
from andante.logic_concepts import Clause

from andante.knowledge import TreeShapedKnowledge
# Define the directory containing the Prolog partition files
prolog_directory = "prolog_partitions_six"

# List of partition file names
partition_files = [
    f"../{prolog_directory}/prolog_program_partition_{i+1}.pl"
    for i in range(6)  # Assuming 6 partitions, adjust as needed
]

# Initialize an OrderedSet to hold all unique rules
all_rules = OrderedSet()

# Iterate over each partition file and induce rules
for partition_file in partition_files:
    print(f"Processing {partition_file}...")
    # Build the AndanteProgram from the current partition file
    ap = AndanteProgram.build_from(partition_file)
    
    # Induce rules and update knowledge
    induced_knowledge = ap.induce(update_knowledge=True, logging=True, verbose=0)
    
    # If induced_knowledge is a TreeShapedKnowledge, extract its clauses
    if isinstance(induced_knowledge, TreeShapedKnowledge):
        for clause in induced_knowledge.clauses:
            if isinstance(clause, Clause):
                all_rules.add(clause)
    else:
        print(f"Unexpected type for induced_rules: {type(induced_knowledge)}")

# Output the combined rules
for rule in all_rules:
    print(rule)

# Optionally, save the combined rules to a file
with open("ilp_movie_recommendation/combined_rules.txt", "w") as f:
    for rule in all_rules:
        f.write(str(rule) + "\n")

print("Combined rules saved to combined_rules.txt")

Processing ../prolog_partitions_six/prolog_program_partition_1.pl...
Processing ../prolog_partitions_six/prolog_program_partition_2.pl...
Processing ../prolog_partitions_six/prolog_program_partition_3.pl...
Processing ../prolog_partitions_six/prolog_program_partition_4.pl...
Processing ../prolog_partitions_six/prolog_program_partition_5.pl...
Processing ../prolog_partitions_six/prolog_program_partition_6.pl...
recommend(A, B) :- b25to34(A), comedy(B).
recommend(A, B) :- crime(B).
recommend(A, B) :- f(A).
recommend(A, B) :- b25to34(A), action(B).
recommend(A, B) :- b45to49(A).
recommend(A, B) :- b18to24(A), drama(B).
recommend(A, B) :- horror(B).
recommend(A, B) :- b25to34(A), f(A).
recommend(A, B) :- adventure(B).
recommend(A, B) :- m(A), drama(B).
Combined rules saved to combined_rules.txt


# Apply Union with normalization and unification

In [None]:
import collections
from andante.program import AndanteProgram
from andante.collections import OrderedSet
from andante.logic_concepts import Clause, Atom, Variable, Predicate
from andante.knowledge import TreeShapedKnowledge

# Define the directory containing the Prolog partition files
prolog_directory = "prolog_partitions_six"

# List of partition file names
partition_files = [
    f"{prolog_directory}/prolog_program_partition_{i+1}.pl"
    for i in range(6)  # Assuming 6 partitions, adjust as needed
]

# Initialize an OrderedSet to hold all unique rules
all_rules = OrderedSet()

# Function to normalize and unify clauses
def normalize_clause(clause):
    # Sort the literals in the body of the clause for consistent ordering
    sorted_body = sorted(clause.body, key=lambda atom: str(atom))

    # Standardize variable names: use a consistent naming scheme, e.g., A, B, C...
    var_mapping = {}
    new_body = []
    new_head = clause.head

    for atom in sorted_body:
        new_terms = []
        for term in atom:
            if isinstance(term, Variable):
                if term not in var_mapping:
                    var_mapping[term] = Variable(chr(ord('A') + len(var_mapping)))
                new_terms.append(var_mapping[term])
            else:
                new_terms.append(term)
        new_body.append(Atom(atom.predicate, new_terms))

    # Apply the same mapping to the head of the clause
    if clause.head:
        new_head_terms = []
        for term in clause.head.terms:
            if isinstance(term, Variable):
                new_head_terms.append(var_mapping.get(term, term))
            else:
                new_head_terms.append(term)
        new_head = Atom(clause.head.predicate, new_head_terms)

    # Return the normalized clause
    return Clause(new_head, new_body)

# Iterate over each partition file and induce rules
for partition_file in partition_files:
    print(f"Processing {partition_file}...")
    # Build the AndanteProgram from the current partition file
    ap = AndanteProgram.build_from(partition_file)
    
    # Induce rules and update knowledge
    induced_knowledge = ap.induce(update_knowledge=True, logging=True, verbose=0)
    
    # If induced_knowledge is a TreeShapedKnowledge, extract its clauses
    if isinstance(induced_knowledge, TreeShapedKnowledge):
        for clause in induced_knowledge.clauses:
            if isinstance(clause, Clause):
                normalized_clause = normalize_clause(clause)
                all_rules.add(normalized_clause)
    else:
        print(f"Unexpected type for induced_rules: {type(induced_knowledge)}")

# Function to check for redundancy and remove duplicates
def remove_redundancy(rules):
    unique_rules = OrderedSet()
    for rule in rules:
        if rule not in unique_rules:
            unique_rules.add(rule)
    return unique_rules

# Remove redundancy from all_rules
all_rules = remove_redundancy(all_rules)

# Output the combined, normalized, and unique rules
for rule in all_rules:
    print(rule)

# Optionally, save the combined rules to a file
with open("combined_rules.txt", "w") as f:
    for rule in all_rules:
        f.write(str(rule) + "\n")

print("Combined, normalized, and unique rules saved to combined_rules_normalized.txt")

Processing prolog_partitions_six/prolog_program_partition_1.pl...


SyntaxError: Failed to parse rule <compoundterm = word __ '(' __ term __ (',' __ term __)* ')'> (prolog_program_partition_1.pl, line 43)