In [None]:
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))

from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [None]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [None]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import scipy
import matplotlib.pyplot as plt

In [None]:
rating_registry.ratings

In [None]:
# Extracting attributes from the Rating objects
ratings_data = [
    {
        "userid": rating.user.uid,
        "gender": rating.user.gender,
        "age": rating.user.age,
        "productid": rating.product.pid,
        "pname": rating.product.name,
        "pgenre": rating.product.genre,
        "rating": rating.rating,
        "timestamp": rating.timestamp
    }
    for rating in rating_registry.ratings
]

# Converting to DataFrame
ratings = pd.DataFrame(ratings_data)

# Displaying the first few rows of the DataFrame
ratings.head(5)

In [None]:
output_file = "ratings_dataset.xlsx"
ratings.to_excel(output_file, index=False)

print(f"Dataset successfully saved to {output_file}")


In [None]:
ratings.shape

In [None]:
""" import pandas as pd
ratings = pd.read_excel('ratings_dataset.xlsx') """
sample_df = ratings.sample(n=500, random_state=42)

# Saving the sample DataFrame to an Excel file
output_file = "ratings_sample_dataset.xlsx"
sample_df.to_excel(output_file, index=False)

print(f"Sample dataset successfully saved to {output_file}")

# Everything is encoded in here and working. 

##### Generate balanced partitions and generate progol program for each partiton

In [None]:
import pandas as pd
import numpy as np
import os

def generate_balanced_partitions(file_path, output_dir, num_partitions, use_actor=False):
    # Load the dataset
    data = pd.read_excel(file_path)

    # Define all possible categories for age, genres, and gender
    possible_ages = {
        "Under 18": "under_18",
        "18-24": "b18to24",
        "25-34": "b25to34",
        "35-44": "b35to44",
        "45-49": "b45to49",
        "50-55": "b50to55",
        "56+": "plus56"
    }
    
    possible_genres = {
        "Action": "action",
        "Adventure": "adventure",
        "Animation": "animation",
        "Children's": "childrens",
        "Comedy": "comedy",
        "Crime": "crime",
        "Documentary": "documentary",
        "Drama": "drama",
        "Fantasy": "fantasy",
        "Film-Noir": "filmnoir",
        "Horror": "horror",
        "Musical": "musical",
        "Mystery": "mystery",
        "Romance": "romance",
        "Sci-Fi": "sci_fi",
        "Thriller": "thriller",
        "Western": "western",
        "War": "war"
    }

    possible_genders = ["m", "f"]

    # Split the data into positive and negative examples
    positive_data = data[data['rating'] > 3].copy()
    negative_data = data[data['rating'] <= 3].copy()

    # Shuffle the data
    positive_data = positive_data.sample(frac=1, random_state=42).reset_index(drop=True)
    negative_data = negative_data.sample(frac=1, random_state=42).reset_index(drop=True)

    # Determine the size of each partition
    pos_partition_size = len(positive_data) // num_partitions
    neg_partition_size = len(negative_data) // num_partitions

    # Ensure that the partitions are balanced
    partitions = []
    for i in range(num_partitions):
        pos_start = i * pos_partition_size
        pos_end = pos_start + pos_partition_size
        neg_start = i * neg_partition_size
        neg_end = neg_start + neg_partition_size

        # Handle remainders by distributing them to the partitions
        if i == num_partitions - 1:
            pos_end = len(positive_data)
            neg_end = len(negative_data)

        partition = pd.concat([
            positive_data.iloc[pos_start:pos_end],
            negative_data.iloc[neg_start:neg_end]
        ]).reset_index(drop=True)
        
        partitions.append(partition)

    # Generate Prolog programs for each partition
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    for i, partition in enumerate(partitions):
        output_file_name = f"{output_dir}/prolog_program_partition_{i+1}"
        generate_progol_program_for_partition(partition, possible_ages, possible_genres, possible_genders, output_file_name, use_actors=use_actor)

def generate_progol_program_for_partition(partition, possible_ages, possible_genres, possible_genders, output_file_name, use_actors=False):
    mapping_dict_product_to_kg = {}
    mapping_dict_kg_to_product = {}
    with open("../results/ml1m/preprocessed/pgpr/mappings/product_mapping.txt", "r") as file:
        next(file)  # Skip the header
        for line in file:
            rating_id, new_id = line.strip().split("\t")
            mapping_dict_kg_to_product[str(new_id)] = str(rating_id)
            mapping_dict_product_to_kg[str(rating_id)] = str(new_id)
            
    # Prepare containers for Prolog facts
    positive_examples = []
    negative_examples = []
    background_knowledge = set()

    # Track the present predicates
    present_ages = set()
    present_genres = set()
    present_genders = set()
    if use_actors:
        relations = pd.read_csv("../results/ml1m/preprocessed/kg_final.txt", sep="\t", header=0)
        movies_actors = relations[relations['relation']==4]
        movies_directors = relations[relations['relation']==9]


    # Process each row in the partition
    for _, row in partition.iterrows():
        user_id = f"u{row['userid']}"
        movie_id = f"m{row['productid']}"
        rating = row['rating']
        age_group = possible_ages.get(row['age'], "").lower()  # Replace age group with corresponding Prolog predicate
        gender = row['gender'].lower()  # Normalize gender
        genre = possible_genres.get(row['pgenre'], "").lower()  # Replace genre with corresponding Prolog predicate

        # Generate positive and negative examples
        if rating > 3:
            positive_examples.append(f"recommend({user_id}, {movie_id}).")
        else:
            negative_examples.append(f"recommend({user_id}, {movie_id}).")

        # Background knowledge: user attributes
        if age_group:
            background_knowledge.add(f"{age_group}({user_id}).")
            present_ages.add(age_group)
        if gender in possible_genders:
            background_knowledge.add(f"{gender}({user_id}).")
            present_genders.add(gender)

        # Background knowledge: movie genre
        if genre:
            background_knowledge.add(f"{genre}({movie_id}).")
            present_genres.add(genre)
        if use_actors:    
            # Add the director and actor informations
            movie_mapping = int(mapping_dict_product_to_kg[movie_id[1:]])
            movies_actor = movies_actors[movies_actors['entity_head']==movie_mapping]['entity_tail']
            for m_actor in movies_actor:
                background_knowledge.add(f"movie_actor({movie_id},a{m_actor}).")
    
    # use a part of the recommendations as watched background knowledge
    np.random.shuffle(positive_examples)
    np.random.shuffle(negative_examples)
    pos_l = len(positive_examples)
    neg_l = len(negative_examples)
    watched = positive_examples[:int(pos_l*0.5)] + negative_examples[:int(neg_l*0.5)]
    positive_examples = positive_examples[int(pos_l*0.5):]
    negative_examples = negative_examples[int(neg_l*0.5):]
    watched = [m.replace('recommend','watched') for m in watched]
    background_knowledge = background_knowledge.union(set(watched))
    similar = [f"similar(A,B):- watched(A,X), {genre}(X), {genre}(Y), watched(B,Y)." for genre in present_genres]
    similar += ["similar(A,B):- watched(A,X), actor(X,Z), watched(B,Y), actor(Y,Z)."]
    share_insterest = "share_interest(A,B):- similar(A,X), watched(X,B)."
    background_knowledge = background_knowledge.union(set(similar+[share_insterest]))

    # Mode declarations
    modeh_declaration = "modeh(*, recommend(+user, +movie))."
    modeb_declarations = [
        f"modeb(*, {age}(+user))." for age in present_ages
    ] + [
        f"modeb(*, watched(+user, +movie))."
    ] + [
        f"modeb(*, {gender}(+user))." for gender in present_genders
    ] + [
        f"modeb(*, {genre}(+movie))." for genre in present_genres
    ] + [
        f"modeb(*, similar(+user, +user))."
    ]
    if use_actors:
        modeb_declarations.append(f"modeb(*, movie_actor(+movie, -actor)).")
        modeb_declarations.append(f"modeb(*, share_interest(+user, -movie)).")
    
    modeb_declarations = [declaration for declaration in modeb_declarations if declaration]  # Remove empty strings

    # Determinations
    determinations = [
        f"determination(recommend/2, {age}/1)." for age in present_ages
    ] + [
        f"determination(recommend/2, watched/2)."
    ] + [
        f"determination(recommend/2, {gender}/1)." for gender in present_genders
    ] + [
        f"determination(recommend/2, {genre}/1)." for genre in present_genres
    ] + [
        f"determination(recommend/2, similar/2)."
    ]
    if use_actors:
        determinations.append(f"determination(recommend/2, movie_actor/2).")
        determinations.append(f"determination(recommend/2, share_interest/2).")
    
    determinations = [determination for determination in determinations if determination]  # Remove empty strings

    # Combine all parts into a Progol-compatible logic program
    progol_program = "% Mode Declarations\n"
    progol_program += modeh_declaration + "\n"
    progol_program += "\n".join(modeb_declarations) + "\n\n"

    progol_program += "% Determinations\n"
    progol_program += "\n".join(determinations) + "\n\n"

    progol_program += "% Background Knowledge\n:- begin_bg.\n"
    progol_program += "\n".join(sorted(background_knowledge)) + "\n:- end_bg.\n\n"

    progol_program += "% Positive Examples\n:- begin_in_pos.\n"
    progol_program += "\n".join(positive_examples) + "\n:- end_in_pos.\n\n"

    progol_program += "% Negative Examples\n:- begin_in_neg.\n"
    progol_program += "\n".join(negative_examples) + "\n:- end_in_neg.\n"

    # Save the Progol logic program to a file with a custom name
    output_file_path = f"{output_file_name}.pl"
    with open(output_file_path, "w") as file:
        file.write(progol_program)

    print(f"Progol logic program saved as {output_file_path}")

# Usage example:
num_partitions = 10
use_actor = True
ext = "actors_" if use_actor else ''
file_path = 'ratings_sample_dataset.xlsx'  # Replace with your actual file path
output_dir = '../prolog_partitions_'+ext+str(num_partitions)  # Directory to save the partitions
generate_balanced_partitions(file_path, output_dir, num_partitions, use_actor=use_actor)


In [9]:
from andante.program import AndanteProgram 
from coverage import evaluate_clause_coverage
apmovies = AndanteProgram.build_from("../prolog_partitions_actors_10/prolog_program_partition_5.pl")
background_knowledge = apmovies.knowledge.copy()
H = apmovies.induce(update_knowledge=True, logging=True, verbose=0)
H.clauses

positive_examples =apmovies.examples['pos']
negative_examples = apmovies.examples['neg']
induced_clauses = H
results = []
for clause in induced_clauses:
    coverage_score = evaluate_clause_coverage(clause, positive_examples, negative_examples, background_knowledge.copy())
    results.append((clause, coverage_score))
for clause, score in results:
    print(f'Clause: {clause}, Coverage: {score}')
total_coverage = evaluate_clause_coverage(next(induced_clauses.__iter__()), positive_examples, negative_examples, apmovies.knowledge.copy())
print(f'Total coverage: {total_coverage}')


Clause: recommend(A, B) :- f(A), comedy(B)., Coverage: {'positive_entailed': 2, 'positive_not_entailed': 12, 'negative_entailed': 0, 'negative_not_entailed': 11, 'score': 2, 'm_estimate': 0.6428571428571429}
Clause: recommend(A, B) :- b18to24(A), f(A)., Coverage: {'positive_entailed': 2, 'positive_not_entailed': 12, 'negative_entailed': 0, 'negative_not_entailed': 11, 'score': 2, 'm_estimate': 0.6428571428571429}
Clause: recommend(A, B) :- under_18(A)., Coverage: {'positive_entailed': 1, 'positive_not_entailed': 13, 'negative_entailed': 0, 'negative_not_entailed': 11, 'score': 1, 'm_estimate': 0.5833333333333334}
Clause: recommend(A, B) :- b18to24(A), action(B)., Coverage: {'positive_entailed': 2, 'positive_not_entailed': 12, 'negative_entailed': 0, 'negative_not_entailed': 11, 'score': 2, 'm_estimate': 0.6428571428571429}
Clause: recommend(A, B) :- filmnoir(B)., Coverage: {'positive_entailed': 1, 'positive_not_entailed': 13, 'negative_entailed': 0, 'negative_not_entailed': 11, 'score'

# Query 
#### Lance une requête pour déterminer si B est recommendé à A en utilisant les règles et les faits définis.

In [None]:
apmovies.query("recommend(A,B).")

###### Test with Jhon 

In [5]:
# Define new user and rule
new_user = "john"  # Assume 'John' is the new user
new_rule_for_user = f"recommend({new_user}, X)."  # Rule specific to the new user

# Query the system
success, substitutions = apmovies.query(new_rule_for_user)

# Display result
if success:
    print(f"The rule was satisfied for {new_user}. Movies recommended:")
    print(substitutions)
else:
    print(f"No movie recommendations found for {new_user}.")

The rule was satisfied for john. Movies recommended:
     0      1      2      3
X  m47  m2987  m1259  m1348


#### Save and Load Model 
##### think about adding apmovies.results = inducedrules to force the save of the results before saving the model 

In [3]:
from andante.program import AndanteProgram 

# Generate or load an AndanteProgram instance
apmovies = AndanteProgram.build_from("../prolog_partitions_six/prolog_program_partition_5.pl")

# Perform learning or querying as usual
induced_hypotheses = apmovies.induce(update_knowledge=True, logging=True, verbose=0)

# 1. Save the AndanteProgram instance
apmovies.results = induced_hypotheses
apmovies.save("saved_andante_program.pkl")

# 2. Load the AndanteProgram instance
loaded_apmovies = AndanteProgram.load("saved_andante_program.pkl")

# 3. Access attributes such as `results`
print("Results from loaded program:")
print(loaded_apmovies.results)

# 4. Use the `query` method on the loaded program
success, substitutions = loaded_apmovies.query("recommend(User, Movie).")

# 5. Display the query results
if success:
    print("Query successful. Substitutions found:")
    print(substitutions)
else:
    print("Query failed. No substitutions found.")

AndanteProgram saved to saved_andante_program.pkl
Results from loaded program:
Knowledge object (class: TreeShapedKnowledge)
Clauses:
   recommend(A, B) :- b45to49(A).
   recommend(A, B) :- b18to24(A), drama(B).
   recommend(A, B) :- crime(B).
   recommend(A, B) :- horror(B).
   recommend(A, B) :- b25to34(A), f(A).
   recommend(A, B) :- adventure(B).
Query successful. Substitutions found:
          0      1      2      3      4      5      6      7      8      9   \
User   u3539  m2987  m1259  u2777  u2777  u2777  u2777  u2777  u2777  u1433   
Movie  u3539  m2987  m1259  m1178  m1960    m17   m337  m1619   m924  m1178   

       ...     38     39     40     41     42     43     44     45     46   47  
User   ...  u4411  u1778  u1778  u1778  u1778  u1778  u1778  m1348  u5127  m47  
Movie  ...   m924  m1178  m1960    m17   m337  m1619   m924  m1348  u5127  m47  

[2 rows x 48 columns]


###### il faut rajouter le nouveau user profile au Bk dans un programme Andante ---> le système ensuite doit regarder pour lui infèrer une recommendation  

In [10]:
text = """
:- begin_bg.
user(u777).
b18to24(u777).
f(u777).
movie(m777).
drama(m777).
action(m1374).
action(m1676).
adventure(m1259).
adventure(m2987).
b18to24(u1433).
b18to24(u1778).
b18to24(u2777).
b18to24(u3418).
b18to24(u4103).
b18to24(u4411).
b18to24(u621).
b25to34(u2840).
b25to34(u3123).
b25to34(u3539).
b25to34(u4138).
b25to34(u4560).
b25to34(u4607).
b25to34(u5077).
b35to44(u1197).
b45to49(u5127).
b50to55(u4981).
comedy(m19).
comedy(m2596).
comedy(m2888).
comedy(m3909).
comedy(m691).
crime(m47).
drama(m1178).
drama(m1619).
drama(m17).
drama(m1960).
drama(m337).
drama(m924).
f(u1433).
f(u3418).
f(u3539).
horror(m1348).
m(u1197).
m(u1778).
m(u2777).
m(u2840).
m(u3123).
m(u4083).
m(u4103).
m(u4138).
m(u4411).
m(u4560).
m(u4607).
m(u4981).
m(u5077).
m(u5127).
m(u621).
plus56(u4083).
sci_fi(m1206).
recommend(A, B) :- b45to49(A).
recommend(A, B) :- b18to24(A), drama(B).
recommend(A, B) :- crime(B).
recommend(A, B) :- horror(B).
recommend(A, B) :- b25to34(A), f(A).
recommend(A, B) :- adventure(B).
:- end_bg.
"""

### Add new user profile, rules to Bk - method

In [4]:
def add_user_profile_tobk(user_profile, loadedmovies):
    """
    Generates the Prolog-style user profile and extracted rules from the loaded_apmovies results.

    Parameters:
    - user_profile (dict): A dictionary containing the user profile information (user_id, age_group, gender, movie_id, movie_genre, rating).
    - loaded_apmovies_results (list of Clause): The list of induced rules (clauses) from loaded_apmovies.results.

    Returns:
    - str: A formatted string containing the user profile facts and the extracted rules in Prolog-style.
    """
    # Extract user profile details from the input dictionary
    user_id = user_profile.get("user_id")
    age_group = user_profile.get("age_group")
    gender = user_profile.get("gender")
    movie_id = user_profile.get("movie_id")
    movie_genre = user_profile.get("movie_genre")
    rating = user_profile.get("rating")

    # Start building the Prolog-style background knowledge text
    text = ":- begin_bg.\n"

    # Add user profile facts
    text += f"    user({user_id}).\n"
    text += f"    {age_group}({user_id}).\n"
    text += f"    {gender}({user_id}).\n"
    text += f"    movie({movie_id}).\n"
    text += f"    {movie_genre}({movie_id}).\n"
    text += f"    rating({rating}).\n"

    # Add the extracted rules from the loaded_apmovies results
    for rule in loadedmovies.results:
        rule_str = str(rule)  # Convert the rule to a string (assuming rule is a Clause object)
        text += f"    {rule_str}\n"
    
    for bk in loadedmovies.knowledge:
        bk_str = str(bk)
        text+= f"{bk_str}\n"

    # End the background knowledge
    text += ":- end_bg.\n"

    return text

### Define user profile

In [14]:
#test with user profile
user_profile = {
    "user_id": "u777",
    "age_group": "b18to24",  # Age group: 18-24
    "gender": "f",  # Gender: Male
    "movie_id": "m777",
    "movie_genre": "drama",
    "rating": 4
}


### Add user profile to Bk, add rules to Bk ( Bk recuperated from loaded model )

In [None]:
# Load the saved AndanteProgram instance
loaded_apmovies = AndanteProgram.load("saved_andante_program.pkl")

#Extract rules from loaded_apmovies and add it to user profile
user_background_profile = add_user_profile_tobk(user_profile, loaded_apmovies)

# Define a query for the new user 'u777' based on induced clauses
#new_user_id = "u777"  # New user ID
user_id = user_profile["user_id"]
query_rule_for_user = f"recommend({user_id}, X)."  # Query for movie recommendations

# Query the system using the induced clauses to get recommendations for the new user
success, substitutions = loaded_apmovies.query(query_rule_for_user)

# Display result using the induced clauses
if success:
    print(f"The rule was satisfied for {user_id}. Movies recommended:")
    print(substitutions)
else:
    print(f"No movie recommendations found for {new_user}.")



In [None]:
user_background_profile = add_user_profile_tobk(user_profile, loaded_apmovies)
#print(user_background_profile)
#extract user id 
user_id = user_profile["user_id"]
query_rule_for_user = f"recommend({user_id}, X)."

ap2 = AndanteProgram.build_from(user_background_profile)
ap2.results = loaded_apmovies.results
ap2.query(query_rule_for_user)

Recommandations trouvées :
    0      1      2      3      4    5     6      7     8     9      10
X  m47  m2987  m1259  m1178  m1960  m17  m337  m1619  m777  m924  m1348
Explication des règles activées :
Matched rule(s): recommend(A, B) :- b45to49(A)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- crime(B)., recommend(A, B) :- horror(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- adventure(B).
Matched rule(s): recommend(A, B) :- b45to49(A)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- crime(B)., recommend(A, B) :- horror(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- adventure(B).
Matched rule(s): recommend(A, B) :- b45to49(A)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- crime(B)., recommend(A, B) :- horror(B)., recommend(A, B) :- b25to34(A), f(A)., recommend(A, B) :- adventure(B).
Matched rule(s): recommend(A, B) :- b45to49(A)., recommend(A, B) :- b18to24(A), drama(B)., recommend(A, B) :- crime(B)., recom

# Combine rules from different partitions 

In [None]:
import collections
from andante.program import AndanteProgram
from andante.collections import OrderedSet
from andante.logic_concepts import Clause

from andante.knowledge import TreeShapedKnowledge
# Define the directory containing the Prolog partition files
prolog_directory = "prolog_partitions_six"

# List of partition file names
partition_files = [
    f"{prolog_directory}/prolog_program_partition_{i+1}.pl"
    for i in range(6)  # Assuming 6 partitions, adjust as needed
]

# Initialize an OrderedSet to hold all unique rules
all_rules = OrderedSet()

# Iterate over each partition file and induce rules
for partition_file in partition_files:
    print(f"Processing {partition_file}...")
    # Build the AndanteProgram from the current partition file
    ap = AndanteProgram.build_from(partition_file)
    
    # Induce rules and update knowledge
    induced_knowledge = ap.induce(update_knowledge=True, logging=True, verbose=0)
    
    # If induced_knowledge is a TreeShapedKnowledge, extract its clauses
    if isinstance(induced_knowledge, TreeShapedKnowledge):
        for clause in induced_knowledge.clauses:
            if isinstance(clause, Clause):
                all_rules.add(clause)
    else:
        print(f"Unexpected type for induced_rules: {type(induced_knowledge)}")

# Output the combined rules
for rule in all_rules:
    print(rule)

# Optionally, save the combined rules to a file
with open("combined_rules.txt", "w") as f:
    for rule in all_rules:
        f.write(str(rule) + "\n")

print("Combined rules saved to combined_rules.txt")

Processing prolog_partitions_six/prolog_program_partition_1.pl...
Processing prolog_partitions_six/prolog_program_partition_2.pl...
Processing prolog_partitions_six/prolog_program_partition_3.pl...
Processing prolog_partitions_six/prolog_program_partition_4.pl...
Processing prolog_partitions_six/prolog_program_partition_5.pl...
Processing prolog_partitions_six/prolog_program_partition_6.pl...
recommend(A, B) :- b25to34(A), comedy(B).
recommend(A, B) :- crime(B).
recommend(A, B) :- f(A).
recommend(A, B) :- b25to34(A), action(B).
recommend(A, B) :- b45to49(A).
recommend(A, B) :- b18to24(A), drama(B).
recommend(A, B) :- horror(B).
recommend(A, B) :- b25to34(A), f(A).
recommend(A, B) :- adventure(B).
recommend(A, B) :- m(A), drama(B).
Combined rules saved to combined_rules.txt


# Apply Union with normalization and unification

In [None]:
import collections
from andante.program import AndanteProgram
from andante.collections import OrderedSet
from andante.logic_concepts import Clause, Atom, Variable, Predicate
from andante.knowledge import TreeShapedKnowledge

# Define the directory containing the Prolog partition files
prolog_directory = "prolog_partitions_six"

# List of partition file names
partition_files = [
    f"{prolog_directory}/prolog_program_partition_{i+1}.pl"
    for i in range(6)  # Assuming 6 partitions, adjust as needed
]

# Initialize an OrderedSet to hold all unique rules
all_rules = OrderedSet()

# Function to normalize and unify clauses
def normalize_clause(clause):
    # Sort the literals in the body of the clause for consistent ordering
    sorted_body = sorted(clause.body, key=lambda atom: str(atom))

    # Standardize variable names: use a consistent naming scheme, e.g., A, B, C...
    var_mapping = {}
    new_body = []
    new_head = clause.head

    for atom in sorted_body:
        new_terms = []
        for term in atom:
            if isinstance(term, Variable):
                if term not in var_mapping:
                    var_mapping[term] = Variable(chr(ord('A') + len(var_mapping)))
                new_terms.append(var_mapping[term])
            else:
                new_terms.append(term)
        new_body.append(Atom(atom.predicate, new_terms))

    # Apply the same mapping to the head of the clause
    if clause.head:
        new_head_terms = []
        for term in clause.head.terms:
            if isinstance(term, Variable):
                new_head_terms.append(var_mapping.get(term, term))
            else:
                new_head_terms.append(term)
        new_head = Atom(clause.head.predicate, new_head_terms)

    # Return the normalized clause
    return Clause(new_head, new_body)

# Iterate over each partition file and induce rules
for partition_file in partition_files:
    print(f"Processing {partition_file}...")
    # Build the AndanteProgram from the current partition file
    ap = AndanteProgram.build_from(partition_file)
    
    # Induce rules and update knowledge
    induced_knowledge = ap.induce(update_knowledge=True, logging=True, verbose=0)
    
    # If induced_knowledge is a TreeShapedKnowledge, extract its clauses
    if isinstance(induced_knowledge, TreeShapedKnowledge):
        for clause in induced_knowledge.clauses:
            if isinstance(clause, Clause):
                normalized_clause = normalize_clause(clause)
                all_rules.add(normalized_clause)
    else:
        print(f"Unexpected type for induced_rules: {type(induced_knowledge)}")

# Function to check for redundancy and remove duplicates
def remove_redundancy(rules):
    unique_rules = OrderedSet()
    for rule in rules:
        if rule not in unique_rules:
            unique_rules.add(rule)
    return unique_rules

# Remove redundancy from all_rules
all_rules = remove_redundancy(all_rules)

# Output the combined, normalized, and unique rules
for rule in all_rules:
    print(rule)

# Optionally, save the combined rules to a file
with open("combined_rules.txt", "w") as f:
    for rule in all_rules:
        f.write(str(rule) + "\n")

print("Combined, normalized, and unique rules saved to combined_rules_normalized.txt")