# Dataset

In [2]:
from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [3]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [4]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)

# Tranform Data to Logic Programs

In [18]:
def generate_progol_program(user_registry, product_registry, rating_registry, output_file_name):
    
    # Mode declarations
    modeh = f"modeh(*, recommend(+user, -m_name)).\n"
    features = ["gender", "age", "m_gender", "m_name", "watched"]
    input_features = [["user", "value"], ["user", "value"], ["movie", "value"], ["movie", "value"], ["user", "movie"]]
    age_dic = {"Under 18": "under18", "56+": "plus56", "25-34": "b25to34", "45-49": "b45to49", "50-55": "b50to55", "35-44": "b35to44", "18-24": "b18to24"}

    modeb_list = [f"modeb(*, {feature}(+{inputs[0]}, -{inputs[1]}))." for feature, inputs in zip(features, input_features)]
    modeb_list.append("modeb(*,user(+userid)).")
    modeb_list.append("modeb(*,movie(+movieid)).")

    # Determinations
    determinations = []
    determinations.append(f"determination(recommend/2, user/1).")
    for f in features:
        determinations.append(f"determination(recommend/2, {f}/2).")
    

    # Background knowledge
    background_knowledge = []
    for user in user_registry.users:
        user_id = user.uid
        user_gender = user.gender
        user_age = user.age
        background_knowledge.append(f"user({user_id}).")
        background_knowledge.append(f"gender({user_id},{user_gender}).")
        background_knowledge.append(f"age({user_id},{age_dic[user_age]}).")
    for product in product_registry.products:
        movie_id = product.pid
        movie_name = product.name
        movie_name = movie_name.lower()
        movie_name = movie_name.replace("-","_")
        movie_name = movie_name.replace(" ","_")
        movie_name = movie_name.replace("(","_")
        movie_name = movie_name.replace(")","")
        movie_name = movie_name.replace("'","")
        movie_name = movie_name.replace(":","")
        movie_name = movie_name.replace(",","")
        movie_name = movie_name.replace("?","")
        movie_name = movie_name.replace("!","")
        movie_name = movie_name.replace(".","")
        movie_name = movie_name.replace("&","")
        movie_name = movie_name.replace("*","")
        movie_name = movie_name.replace("/","_")
        movie_name = movie_name.replace("$","")
        movie_name = movie_name.replace("#", "")
        movie_name = movie_name.replace("é", "")
        if movie_name[0].isdigit():
            movie_name = "m"+movie_name
        movie_gender = product.genre
        movie_gender = movie_gender.lower()
        movie_gender = movie_gender.replace("-","_")
        movie_gender = movie_gender.replace("'","")
        movie_gender = movie_gender.replace("&","")
        background_knowledge.append(f"movie({movie_id}).")
        background_knowledge.append(f"m_gender({movie_id},{movie_gender}).")
        background_knowledge.append(f"m_name({movie_id},{movie_name}).")
    for rating in rating_registry.ratings:
        user_id = rating.user.uid
        movie_id = rating.product.pid
        background_knowledge.append(f"watched({user_id},{movie_id}).")
        
    
    # Positive and negative examples
    positive_examples = []
    negative_examples = []
    for rating in rating_registry.ratings:
        score = rating.rating
        user_id = rating.user
        movie_id = rating.product
        if score > 3:
            positive_examples.append(f"recommend({user_id},{movie_id}).")
        else:
            negative_examples.append(f"recommend({user_id},{movie_id}).")

    # Combine all parts into a Progol-compatible logic program
    progol_program = "% Mode Declarations\n"
    progol_program += modeh
    progol_program += "\n".join(modeb_list) + "\n\n"

    progol_program += "% Determinations\n"
    progol_program += "\n".join(determinations) + "\n\n"

    progol_program += "% Background Knowledge\n:- begin_bg.\n"
    progol_program += "\n".join(background_knowledge) + "\n:- end_bg.\n\n"

    progol_program += "% Positive Examples\n:- begin_in_pos.\n"
    progol_program += "\n".join(positive_examples) + "\n:- end_in_pos.\n\n"

    progol_program += "% Negative Examples\n:- begin_in_neg.\n"
    progol_program += "\n".join(negative_examples) + "\n:- end_in_neg.\n"

    # Save the Progol logic program to a file with a custom name
    output_file_path = f"{output_file_name}.pl"
    with open(output_file_path, "w") as file:
        file.write(progol_program)

    return output_file_path

# Generate the Progol logic program
# Usage example:
output_file_name = './movie_rec'
output_file = generate_progol_program(user_registry, product_registry, rating_registry, output_file_name)
print(f"Progol logic program saved as {output_file}")

Progol logic program saved as ./movie_rec.pl


# Horizontal Partitionning

In [22]:
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit

# Load the dataset
file_path = 'C:/Users/yakaichi/OneDrive - Université de Namur/Bureau/Data/pre_processed_data/smallestaftercorrelationstudies2_intubationrecords_dataset.xlsx'
data = pd.read_excel(file_path)

# Get the target column (assumed to be named 'complications')
target_column = 'complications'

# Specify the number of splits
num_splits = 6  # You can adjust this depending on how many files you want

# Use StratifiedShuffleSplit to create balanced splits
sss = StratifiedShuffleSplit(n_splits=num_splits, test_size=1/num_splits, random_state=42)

# Directory to save the splits
directory = 'C:/Users/yakaichi/OneDrive - Université de Namur/Bureau/Data/pre_processed_data/programs_parts3/'

# Split and save the datasets
for i, (_, test_index) in enumerate(sss.split(data, data[target_column])):
    split_df = data.iloc[test_index]
    split_df.to_excel(f'{directory}horizontal_split_dataset_{i+1}.xlsx', index=False)

print("Balanced datasets have been saved successfully.")

ModuleNotFoundError: No module named 'sklearn'

# ILP Learning 

In [21]:
from andante.program import AndanteProgram 
apmovies = AndanteProgram.build_from("movie_rec.pl")
apmovies.induce(update_knowledge=True, logging=True, verbose=0)

KeyboardInterrupt: 