In [5]:
import pandas as pd
from pyswip import Prolog

In [3]:
# funzione per salvare su file una lista di stringhe
def save_to_file(strings, filename):
    with open(filename, 'w') as f:
        f.write('\n'.join(strings))

### Creazione della knowledge base

In [None]:
save_to_file([":-style_check(-discontiguous)."], 'facts.pl')

# fatti per i film
df = pd.read_csv('dataset/movies_v2.csv')

for row in df.itertuples():
    facts = []
    facts.append(f"title(movie('{row.id}'), '{row.title}').")
    facts.append(f"rating(movie('{row.id}'), '{row.rating}').")
    facts.append(f"genre(movie('{row.id}'), '{row.genre}').")
    facts.append(f"year(movie('{row.id}'), {row.year}).")
    facts.append(f"score(movie('{row.id}'), {row.score}).")
    facts.append(f"votes(movie('{row.id}'), {row.votes}).")
    facts.append(f"country(movie('{row.id}'), '{row.country}').")
    facts.append(f"budget(movie('{row.id}'), {row.budget}).")
    facts.append(f"gross(movie('{row.id}'), {row.gross}).")
    facts.append(f"company(movie('{row.id}'), '{row.company}').")
    facts.append(f"runtime(movie('{row.id}'), {row.runtime}).")
    # relazioni tra film e artisti
    facts.append(f"director(movie('{row.id}'), '{row.director}').")
    facts.append(f"writer(movie('{row.id}'), '{row.writer}').")
    facts.append(f"star(movie('{row.id}'), '{row.star}').")

    save_to_file(facts, 'facts.pl')

# fatti per gli artisti
df = pd.read_csv('dataset/combined_v3.csv')

for row in df.itertuples():
    facts = []
    facts.append(f"birthYear(artist('{row.name}'), {row.birthYear}).")
    facts.append(f"deathYear(artist('{row.name}'), {row.deathYear}).")
    facts.append(f"knownForTitle(artist('{row.name}'), '{row.knownForTitle}').")
    facts.append(f"profession1(artist('{row.name}'), '{row.profession1}').")
    facts.append(f"profession2(artist('{row.name}'), '{row.profession2}').")
    facts.append(f"profession3(artist('{row.name}'), '{row.profession3}').")

    save_to_file(facts, 'facts.pl')

### Feature engineering con Prolog

In [None]:
prolog = Prolog()
prolog.consult('facts.pl')

In [19]:
clauses = []

clauses.append(("num_films_artist(Artist, Count) :-\n"
                "\tfindall(Movie, (star(movie(Movie), Artist) ; director(movie(Movie), Artist) ; writer(movie(Movie), Artist)), Movies),\n"
                "\tlength(Movies, Count).\n"))

clauses.append(("num_films_directed(Director, Count) :-\n"
                "\tfindall(Movie, director(movie(Movie), Director), Movies),\n"
                "\tlength(Movies, Count).\n"))

clauses.append(("num_films_starred(Star, Count) :-\n"
                "\tfindall(Movie, star(movie(Movie), Star), Movies),\n"
                "\tlength(Movies, Count).\n"))

clauses.append(("num_films_written(Writer, Count) :-\n"
                "\tfindall(Movie, writer(movie(Movie), Writer), Movies),\n"
                "\tlength(Movies, Count).\n"))

clauses.append(("avg_budget_directed(Director, AvgBudget) :-\n"
                "\tfindall(Budget, (director(movie(Movie), Director), budget(movie(Movie), Budget)), Budgets),\n"
                "\tlength(Budgets, Count),\n"
                "\tsum_list(Budgets, TotalBudget),\n"
                "\tAvgBudget is TotalBudget / Count.\n"))

clauses.append(("avg_gross_directed(Director, AvgGross) :-\n"
                "\tfindall(Gross, (director(movie(Movie), Director), gross(movie(Movie), Gross)), Grosses),\n"
                "\tlength(Grosses, Count),\n"
                "\tsum_list(Grosses, TotalGross),\n"
                "\tAvgGross is TotalGross / Count.\n"))

clauses.append(("gross_to_budget_ratio(Movie, Ratio) :-\n"
                "\tgross(movie(Movie), Gross),\n"
                "\tbudget(movie(Movie), Budget),\n"
                "\tBudget > 0, % Evita la divisione per zero\n"
                "\tRatio is (Gross / Budget).\n"))

clauses.append(("film_success(Movie, Success) :-\n"
                "\tmax_gross(MaxGross),\n"
                "\tMaxGross > 0,\n"
                "\tgross(movie(Movie), Gross),\n"
                "\tbudget(movie(Movie), Budget),\n"
                "\tBudget > 0,\n"
                "\tgross_to_budget_ratio(Movie, Ratio),\n"
                "\tNormalizedGross is Gross / MaxGross,\n"
                "\tSuccess is 0.7 * NormalizedGross + 0.3 * Ratio.\n"))

clauses.append(("max_gross(MaxGross) :-\n"
                "\tfindall(Gross, gross(movie(_), Gross), Grosses),\n"
                "\tmax_list(Grosses, MaxGross).\n"))

clauses.append(("film_success_category(Movie, 'basso') :-\n"
                "\tfilm_success_normalized(Movie, Success),\n"
                "\tSuccess =< 0.4.\n"))

clauses.append(("film_success_category(Movie, 'medio') :-\n"
                "\tfilm_success_normalized(Movie, Success),\n"
                "\tSuccess > 0.4,\n"
                "\tSuccess =< 0.7.\n"))

clauses.append(("film_success_category(Movie, 'alto') :-\n"
                "\tfilm_success_normalized(Movie, Success),\n"
                "\tSuccess > 0.7.\n"))

clauses.append(("avg_weighted_score(AvgScore) :-\n"
                "\tfindall(Score, weighted_score(movie(_), Score), Scores),\n"
                "\tlength(Scores, Count),\n"
                "\tCount > 0, % Assicurati che ci siano score da calcolare\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tAvgScore is TotalScore / Count.\n"))

clauses.append(("std_dev_weighted_score(StdDev) :-\n"
                "\tavg_score(AvgScore),\n"
                "\tfindall(Score, weighted_score(movie(_), Score), Scores),\n"
                "\tlength(Scores, Count),\n"
                "\tCount > 1,\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tsum_of_squares(Scores, TotalScore, AvgScore, SumSq),\n"
                "\tVariance is SumSq / (Count - 1),\n"
                "\tStdDev is sqrt(Variance).\n"))

clauses.append(("sum_of_squares([], _, _, 0).\n"
                "\tsum_of_squares([Score|Rest], TotalScore, AvgScore, SumSq) :-\n"
                "\tSumSqRest is SumSq + (Score - AvgScore)^2,\n"
                "\tsum_of_squares(Rest, TotalScore, AvgScore, SumSqRest).\n"))

clauses.append(("max_votes(MaxVotes) :-\n"
                "\tfindall(Votes, votes(movie(_), Votes), VotesList),-\n"
                "\tmax_list(VotesList, MaxVotes).\n"))

clauses.append(("film_quality(Movie, 'bassa') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore =< AvgScore - StdDev.\n"))

clauses.append(("film_quality(Movie, 'media') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore - StdDev,\n"
                "\tScore =< AvgScore + StdDev.\n"))

clauses.append(("film_quality(Movie, 'alta') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore + StdDev.\n"))

clauses.append(("avg_score(AvgScore) :-\n"
                "\tfindall(Score, score(movie(_), Score), Scores),\n"
                "\tlength(Scores, Count),\n"
                "\tCount > 0,\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tAvgScore is TotalScore / Count.\n"))

clauses.append(("std_dev_score(StdDev) :-\n"
                "\tavg_score(AvgScore),\n"
                "\tfindall(Score, score(movie(_), Score), Scores),\n"
                "\tlength(Scores, Count),\n"
                "\tCount > 1,\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tsum_of_squares(Scores, TotalScore, AvgScore, SumSq),\n"
                "\tVariance is SumSq / (Count - 1),\n"
                "\tStdDev is sqrt(Variance).\n"))

clauses.append(("film_quality_category(Movie, 'bassa') :-\n"
                "\tavg_weighted_score(AvgScore),\n"
                "\tstd_dev_weighted_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore =< AvgScore - StdDev.\n"))

clauses.append(("film_quality_category(Movie, 'media') :-\n"
                "\tavg_weighted_score(AvgScore),\n"
                "\tstd_dev_weighted_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore - StdDev,\n"
                "\tScore =< AvgScore + StdDev.\n"))

clauses.append(("film_quality_category(Movie, 'alta') :-\n"
                "\tavg_weighted_score(AvgScore),\n"
                "\tstd_dev_weighted_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore + StdDev.\n"))

clauses.append(("avg_votes(AvgVotes) :-\n"
                "\tfindall(Votes, votes(movie(_), Votes), VotesList),\n"
                "\tlength(VotesList, Count),\n"
                "\tCount > 0,\n"
                "\tsum_list(VotesList, TotalVotes),\n"
                "\tAvgVotes is TotalVotes / Count.\n"))

clauses.append(("std_dev_votes(StdDevVotes) :-\n"
                "\tavg_votes(AvgVotes),\n"
                "\tfindall(Votes, votes(movie(_), Votes), VotesList),\n"
                "\tlength(VotesList, Count),\n"
                "\tCount > 1,\n"
                "\tsum_list(VotesList, TotalVotes),\n"
                "\tsum_of_squares(VotesList, AvgVotes, SumSq),\n"
                "\tVariance is SumSq / (Count - 1),\n"
                "\tStdDevVotes is sqrt(Variance).\n"))

clauses.append(("vote_category(Movie, 'poco') :-\n"
                "\tavg_votes(AvgVotes),\n"
                "\tstd_dev_votes(StdDevVotes),\n"
                "\tvotes(movie(Movie), Votes),\n"
                "\tVotes =< AvgVotes - StdDevVotes.\n"))

clauses.append(("vote_category(Movie, 'medio') :-\n"
                "\tavg_votes(AvgVotes),\n"
                "\tstd_dev_votes(StdDevVotes),\n"
                "\tvotes(movie(Movie), Votes),\n"
                "\tVotes > AvgVotes - StdDevVotes,\n"
                "\tVotes =< AvgVotes + StdDevVotes.\n"))

clauses.append(("vote_category(Movie, 'molto') :-\n"
                "\tavg_votes(AvgVotes),\n"
                "\tstd_dev_votes(StdDevVotes),\n"
                "\tvotes(movie(Movie), Votes),\n"
                "\tVotes > AvgVotes + StdDevVotes.\n"))

clauses.append(("weighted_score(Movie, AdjustedScore) :-\n"
    "\t% Ottieni lo score iniziale del film\n"
    "\tscore(movie(Movie), Score),\n"
    
    "\t% Ottieni la categoria di voto del film\n"
    "\tvote_category(Movie, VoteCategory),\n"
    
    "\t% Ottieni la qualita del film\n"
    "\tfilm_quality(Movie, Quality),\n"
    
    "\t% Aggiusta lo score in base alla categoria di voto e alla qualità\n"
    "\t(\n"
        "\t\tVoteCategory = 'molto',\n"
        "\t\t(\n"
            "\t\t\tQuality = 'alta' -> AdjustedScore is min(10, Score + 0.3) ; % Alza leggermente lo score se qualità alta\n"
            "\t\t\tQuality = 'bassa' -> AdjustedScore is max(0, Score - 0.3) ; % Abbassa leggermente lo score se qualità bassa\n"
            "\t\t\tQuality = 'media' -> AdjustedScore is Score % Lascia invariato lo score se qualità media\n"
        "\t\t);\n"
        "\t\tVoteCategory = 'poco',\n"
        "\t\t(\n"
            "\t\t\tQuality = 'alta' -> AdjustedScore is max(0, Score - 0.3) ; % Abbassa leggermente lo score se qualità alta\n"
            "\t\t\tQuality = 'bassa' -> AdjustedScore is min(10, Score + 0.3) ; % Alza leggermente lo score se qualità bassa\n"
            "\t\t\tQuality = 'media' -> AdjustedScore is Score % Lascia invariato lo score se qualità media\n"
        "\t\t);\n"
        "\t\tVoteCategory = 'medio',\n"
        "\t% Lascia invariato lo score se il voto è medio\n"
        "\t\tAdjustedScore is Score\n"
    "\t)\n"))

save_to_file(clauses, 'clauses.pl')