In [11]:
import pandas as pd
from pyswip import Prolog
import numpy as np

In [12]:
# funzione che salva su un file i fatti
def save_to_file(strings, filename):
    with open(filename, 'a') as f:
        f.write('\n'.join(strings))

### Creazione della knowledge base

In [14]:
save_to_file([":-style_check(-discontiguous)."], 'facts.pl')

# fatti per i film
df = pd.read_csv('../dataset/movies_v2.csv')

for row in df.itertuples():
    facts = []
    facts.append(f"title(movie({row.id}), '{row.title}').")
    facts.append(f"rating(movie({row.id}), '{row.rating}').")
    facts.append(f"genre(movie({row.id}), '{row.genre}').")
    facts.append(f"year(movie({row.id}), {row.year}).")
    facts.append(f"score(movie({row.id}), {row.score}).")
    facts.append(f"votes(movie({row.id}), {row.votes}).")
    facts.append(f"country(movie({row.id}), '{row.country}').")
    facts.append(f"budget(movie({row.id}), {row.budget}).")
    facts.append(f"gross(movie({row.id}), {row.gross}).")
    facts.append(f"company(movie({row.id}), '{row.company}').")
    facts.append(f"runtime(movie({row.id}), {row.runtime}).")
    # relazioni tra film e artisti
    facts.append(f"director(movie({row.id}), '{row.director}').")
    facts.append(f"star(movie({row.id}), '{row.star}').\n")

    save_to_file(facts, 'facts.pl')

# seleziono tutti i valori unici per la colonna 'director'
df_directors = df['director'].unique()
# seleziono tutti i valori unici per la colonna 'star'
df_stars = df['star'].unique()

# ottengo l'intersezione tra i valori unici per la colonna 'director' e 'star'
df_common = pd.Series(np.intersect1d(df_directors, df_stars))

# fatti per gli artisti
for name in df_common:
    facts = []
    facts.append(f"artist('{name}').")
    save_to_file(facts, 'facts.pl')

### Feature engineering con Prolog

In [19]:
clauses = []

# clausola per il conteggio dei film di un attore
clauses.append(("num_films_by_star(Star, Count) :-\n"
                "\tfindall(Movie, (star(movie(Movie), Star), Movies),\n"
                "\tlength(Movies, Count)).\n\n"))

# clausola per la media di budget di un attore
clauses.append(("average_budget_by_star(Star, AvgBudget) :-\n"
                "\tfindall(Budget, (star(movie(ID), Star), budget(movie(ID), Budget)), Budgets),\n"
                "\tlength(Budgets, N),\n"
                "\tsum_list(Budgets, Total),\n"
                "\tAvgBudget is Total / N.\n\n"))

# clausola per la media di score di un attore
clauses.append(("average_score_by_star(Star, AvgScore) :-\n"
                "\tfindall(Score, (star(movie(ID), Star), score(movie(ID), Score)), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tsum_list(Scores, Total),\n"
                "\tAvgScore is Total / N.\n\n"))

# clausola per la deviazione standard di budget di un attore
clauses.append(("std_dev_budget_by_star(Star, StdDev) :-\n"
                "\taverage_budget_by_star(Star, AvgBudget),\n"
                "\tfindall(Budget, (star(movie(ID), Star), budget(movie(ID), Budget)), Budgets),\n"
                "\tlength(Budgets, N),\n"
                "\tsum_list(Budgets, Total),\n"
                "\tsum_of_squares(Budgets, Total, AvgBudget, SumSq),\n"
                "\tVariance is SumSq / (N - 1),\n"
                "\tStdDev is sqrt(Variance).\n\n"))

# clausola per la deviazione standard di score di un attore
clauses.append(("std_dev_score_by_star(Star, StdDev) :-\n"
                "\taverage_score_by_star(Star, AvgScore),\n"
                "\tfindall(Score, (star(movie(ID), Star), score(movie(ID), Score)), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tsum_list(Scores, Total),\n"
                "\tsum_of_squares(Scores, Total, AvgScore, SumSq),\n"
                "\tVariance is SumSq / (N - 1),\n"
                "\tStdDev is sqrt(Variance).\n\n"))

# definisci la clausola sum_of_squares/4 per calcolare la somma dei quadrati di una lista di valori
clauses.append(("sum_of_squares([], _, _, 0).\n"
                "\tsum_of_squares([Value|Rest], Total, Avg, SumSq) :-\n"
                "\tSumSqRest is SumSq + (Value - Avg)^2,\n"
                "\tsum_of_squares(Rest, Total, Avg, SumSqRest).\n\n"))

# clausola per il conteggio di film di un regista
clauses.append(("num_films_by_director(Director, Count) :-\n"
                "\tfindall(Movie, (director(movie(Movie), Director), Movies),\n"
                "\tlength(Movies, Count)).\n\n"))

# clausola per la media di budget di un regista
clauses.append(("average_budget_by_director(Director, AvgBudget) :-\n"
                "\tfindall(Budget, (director(movie(ID), Director), budget(movie(ID), Budget)), Budgets),\n"
                "\tlength(Budgets, N),\n"
                "\tsum_list(Budgets, Total),\n"
                "\tAvgBudget is Total / N.\n\n"))

# clausola per la media di score di un regista
clauses.append(("average_score_by_director(Director, AvgScore) :-\n"
                "\tfindall(Score, (director(movie(ID), Director), score(movie(ID), Score)), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tsum_list(Scores, Total),\n"
                "\tAvgScore is Total / N.\n\n"))

# clausola per la deviazione standard di budget di un regista
clauses.append(("std_dev_budget_by_director(Director, StdDev) :-\n"
                "\taverage_budget_by_director(Director, AvgBudget),\n"
                "\tfindall(Budget, (director(movie(ID), Director), budget(movie(ID), Budget)), Budgets),\n"
                "\tlength(Budgets, N),\n"
                "\tsum_list(Budgets, Total),\n"
                "\tsum_of_squares(Budgets, Total, AvgBudget, SumSq),\n"
                "\tVariance is SumSq / (N - 1),\n"
                "\tStdDev is sqrt(Variance).\n\n"))

# clausola per la deviazione standard di score di un regista
clauses.append(("std_dev_score_by_director(Director, StdDev) :-\n"
                "\taverage_score_by_director(Director, AvgScore),\n"
                "\tfindall(Score, (director(movie(ID), Director), score(movie(ID), Score)), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tsum_list(Scores, Total),\n"
                "\tsum_of_squares(Scores, Total, AvgScore, SumSq),\n"
                "\tVariance is SumSq / (N - 1),\n"
                "\tStdDev is sqrt(Variance).\n\n"))

# clausola per l'indice media/deviazione standard per budget di un regista
clauses.append(("director_budget_index(Director, Ratio) :-\n"
                "\tstd_dev_budget_by_director(Director, StdDev),\n"
                "\taverage_budget_by_director(Director, AvgBudget),\n"
                "\tRatio is AvgBudget / StdDev.\n"))

# clausola per l'indice media/deviazione standard per score di un regista
clauses.append(("director_score_index(Director, Ratio) :-\n"
                "\tstd_dev_score_by_director(Director, StdDev),\n"
                "\taverage_score_by_director(Director, AvgScore),\n"
                "\tRatio is AvgScore / StdDev.\n"))

# clausola per l'indice media/deviazione standard per budget di un attore
clauses.append(("star_budget_index(Star, Ratio) :-\n"
                "\tstd_dev_budget_by_star(Star, StdDev),\n"
                "\taverage_budget_by_star(Star, AvgBudget),\n"
                "\tRatio is AvgBudget / StdDev.\n"))

# clausola per l'indice media/deviazione standard per score di un attore
clauses.append(("star_score_index(Star, Ratio) :-\n"
                "\tstd_dev_score_by_star(Star, StdDev),\n"
                "\taverage_score_by_star(Star, AvgScore),\n"
                "\tRatio is AvgScore / StdDev.\n"))


# clausola per l'indice di utile di un film
clauses.append(("film_profit_index(Movie, Ratio) :-\n"
                "\tgross(movie(Movie), Gross),\n"
                "\tbudget(movie(Movie), Budget),\n"
                "\tBudget > 0,\n"
                "\tRatio is (Gross / Budget).\n"))

# clausola per l'indice di successo commerciale di un film
clauses.append(("film_success_index(Movie, Ratio) :-\n"
                "\tfilm_profit_index(Movie, ProfitIndex),\n"
                "\tgross(movie(Movie), Gross),\n"
                "\tRatio is ProfitIndex * log(Gross).\n"))

# clausola per l'indice di culto di un film
clauses.append(("film_cult_index(Movie, Ratio) :-\n"
                "\tvotes(movie(Movie), Votes),\n"
                "\tscore(Movie, Score),\n"
                "\tRatio is Score * log(Votes).\n"))

# clausola per la media di score dei film
clauses.append(("avg_score(AvgScore) :-\n"
                "\tfindall(Score, score(movie(_), Score), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tN > 0,\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tAvgScore is TotalScore / N.\n"))

# clausola per la deviazione standard di score dei film
clauses.append(("std_dev_score(StdDev) :-\n"
                "\tavg_score(AvgScore),\n"
                "\tfindall(Score, score(movie(_), Score), Scores),\n"
                "\tlength(Scores, N),\n"
                "\tN > 1,\n"
                "\tsum_list(Scores, TotalScore),\n"
                "\tsum_of_squares(Scores, TotalScore, AvgScore, SumSq),\n"
                "\tVariance is SumSq / (N - 1),\n"
                "\tStdDev is sqrt(Variance).\n"))

# clausola per la qualità dei film
clauses.append(("film_quality(Movie, 'bassa') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore =< AvgScore - StdDev.\n"))

clauses.append(("film_quality(Movie, 'media') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore - StdDev,\n"
                "\tScore =< AvgScore + StdDev.\n"))

clauses.append(("film_quality(Movie, 'alta') :-\n"
                "\tavg_score(AvgScore),\n"
                "\tstd_dev_score(StdDev),\n"
                "\tscore(movie(Movie), Score),\n"
                "\tScore > AvgScore + StdDev.\n"))


save_to_file(clauses, 'clauses.pl')