In [1]:
# IMPORTS

import pandas as pd
from datetime import datetime
from pyswip import Prolog

In [2]:
# COSTANTI

FACTS_FILE_PATH = 'kb/facts.pl'
RULES_FILE_PATH = 'kb/rules.pl'

In [3]:
def write_on_file(path, s):
    """
    Scrive su file la stringa passata.
    """
    with open(path, 'w+') as file:
        file.write(s)

In [3]:
# CREAZIONE DELLA KNOWLEDGE BASE

def create_facts_from_datasets():
    """
    Crea la base di conoscenza prolog a partire dai dati.
    """
    df_requests = pd.read_csv('datasets/311-2023-05-v2.csv')
    df_subboroughs = pd.read_csv('datasets/subboroughs-ny.csv')
    
    def convert_to_prolog_date(s):
        if s == 'unknown':
            return s
        date = datetime.strptime(s, '%Y-%m-%d %H:%M:%S')
        return f"date({date.day},{date.month},{date.year})"


    facts = ":-style_check(-discontiguous).\n"

    for _, row in df_requests.iterrows():
        request = f"request({row['Unique Key']})"
        fact = (f"created_date({request}, {convert_to_prolog_date(row['Created Date'])}).\n"
                f"closed_date({request}, {convert_to_prolog_date(row['Closed Date'])}).\n"
                f"status({request}, '{row['Status']}').\n"
                f"complaint_type({request}, '{row['Complaint Type']}').\n"
                f"location_type({request}, '{row['Location Type']}').\n"
                f"agency({request}, '{row['Agency']}').\n"
                f"channel_type({request}, '{row['Channel Type']}').\n"
                f"incident_location_lat({request}, {row['Latitude']}).\n"
                f"incident_location_long({request}, {row['Longitude']}).\n"
                f"incident_address({request}, '{row['Incident Address']}').\n"
                f"incident_borough({request}, '{row['Borough']}').\n"
                f"incident_subborough({request}, subborough('{row['Sub-Borough Area']}')).\n")
        facts += fact

    for _, row in df_subboroughs.iterrows():
        subborough = f"subborough('{row['Sub-Borough Area']}')"
        fact = (f"property_crime_rate({subborough}, {row['Property Crime Rate']}).\n"
                f"violent_crime_rate({subborough}, {row['Violent Crime Rate']}).\n"
                f"low_income_pop({subborough}, {row['Low Income Population']}).\n"
                f"mediumlow_income_pop({subborough}, {row['Medium-Low Income Population']}).\n"
                f"medium_income_pop({subborough}, {row['Medium Income Population']}).\n"
                f"mediumhigh_income_pop({subborough}, {row['Medium-High Income Population']}).\n"
                f"high_income_pop({subborough}, {row['High Income Population']}).\n"
                f"veryhigh_income_pop({subborough}, {row['Very High Income Population']}).\n"
                f"poverty_rate({subborough}, {row['Poverty Rate']}).\n"
                f"hispanic_pop({subborough}, {row['Hispanic Population']}).\n"
                f"asian_pop({subborough}, {row['Asian Population']}).\n"
                f"black_pop({subborough}, {row['Black Population']}).\n"
                f"white_pop({subborough}, {row['White Population']}).\n"
                f"pop_density({subborough}, {row['Population Density']}).\n"
                f"pop_aged_65({subborough}, {row['Population Aged 65+']}).\n"
                f"nys_born_people({subborough}, {row['NYS Born People']}).\n"
                f"foreign_born_people({subborough}, {row['Foreign Born People']}).\n"
                f"disabled_people({subborough}, {row['Disabled People']}).\n"
                f"unemployment_rate({subborough}, {row['Unemployment Rate']}).\n"
                f"carfree_commuters({subborough}, {row['Car-Free Commuters']}).\n"
                f"families_with_children({subborough}, {row['Families with Children']}).\n"
                f"people_with_bachelor({subborough}, {row['People O25 with Bachelor']}).\n"
                f"people_without_diploma({subborough}, {row['People O25 without Diploma']}).\n")
        facts += fact
    
    write_on_file(FACTS_FILE_PATH, facts)

In [4]:
create_facts_from_datasets()

In [None]:
def define_rules():
    """
    """
    rules = ""

    rules += (f"count_requests_per_subborough(subborough(S), Count) :-\n"
              f"findall(Request, (incident_subborough(request(Request), subborough(S))), Requests),\n"
              f"length(Requests, Count).\n")
    
    rules += (f"average_requests_per_subborough(Average) :-\n"
              f"findall(Count, (subborough(S), count_requests_per_subborough(subborough(S), Count)), Counts),\n"
              f"sum_list(Counts, Total),\n"
              f"length(Counts, NumberOfSubboroughs),\n"
              f"Average is Total / NumberOfSubboroughs.\n")

    rules += (f"date_to_julian(date(D, M, Y), Julian) :-\n"
              f"Julian is D + M * 30 + Y * 365.  % Semplificazione per il calcolo dei giorni.\n")
    
    rules += (f"days_between(date(D1, M1, Y1), date(D2, M2, Y2), Days) :-\n"
              f"date_to_julian(date(D1, M1, Y1), Julian1),\n"
              f"date_to_julian(date(D2, M2, Y2), Julian2),\n"
              f"Days is Julian2 - Julian1.\n")
    
    rules += (f"is_closed(request(R)) :-\n"
              f"status(request(R), 'closed').\n")

    rules += (f"days_to_close(request(R), Days) :-\n"
              f"created_date(request(R), CreatedDate),\n"
              f"closed_date(request(R), ClosedDate),\n"
              f"ClosedDate \= unknown,\n"
              f"days_between(CreatedDate, ClosedDate, Days).\n")

    rules += (f"is_closed_immediately(request(R)) :-\n"
              f"is_closed(request(R)),\n"
              f"days_to_close(request(R), Days),\n"
              f"Days =:= 0.\n")

    rules += (f"majority_white(subborough(S)) :-\n"
              f"white_pop(subborough(S), Wh),\n"
              f"Wh > 60.\n")

    rules += (f"minority_white(subborough(S)) :-\n"
              f"white_pop(subborough(S), Wh),\n"
              f"Wh < 20.\n")

    rules += (f"racial_diversity_index(subborough(S), Index) :-\n"
              f"white_pop(subborough(S), Wh),\n"
              f"black_pop(subborough(S), Bl),\n"
              f"asian_pop(subborough(S), As),\n"
              f"hispanic_pop(subborough(S), Hs),\n"
              f"IdealPercentage is 25.0,\n"
              f"DeviationWh is (Wh - IdealPercentage) ** 2,\n"
              f"DeviationBl is (Bl - IdealPercentage) ** 2,\n"
              f"DeviationAs is (As - IdealPercentage) ** 2,\n"
              f"DeviationHs is (Hs - IdealPercentage) ** 2,\n"
              f"SumOfDeviations is DeviationWh + DeviationBl + DeviationAs + DeviationHs,\n"
              f"Index is 100 - sqrt(SumOfDeviations / 4).\n")

    write_on_file(RULES_FILE_PATH, rules)

In [21]:
define_rules()

In [None]:
prolog = Prolog()

prolog.consult(FACTS_FILE_PATH)
prolog.consult(RULES_FILE_PATH)

print(list(prolog.query("count_requests_per_subborough(subborough('queens village'), Count)")))
print(list(prolog.query("is_closed_immediately(request(R))")))
print(list(prolog.query("days_to_close(request(R), Days)")))
print(list(prolog.query("majority_white(subborough(S))")))
print(list(prolog.query("minority_white(subborough(S))")))
print(list(prolog.query("racial_diversity_index(subborough(S), Index)")))