This notebook file contains all the code used for the experiments that were done. Data and results can be found in the offically used data folder. This file is a cleaned up version of the file the experiments were run in. I kept that file accesible in the legacy code folder as "tree_and_server_legacy.ipynb", but this file contains all the useful code with none of the errant test functions and print statements. I also added more comments to this file for readability.

In [None]:
import pandas as pd
import numpy as np
import csv
import subprocess
import time
import pickle
import json
from sklearn.metrics import classification_report
import re
from openpyxl import Workbook
# For below import to work move convert_report2excel from support files to the same directory as this file
from convert_report2excel import convert_report2excel

# To run this code it is also important that you clone a local copy of the github project https://github.com/martaannaj/RecommenderServer

In [None]:
# Below functions evolved out of earlier versions that were created when first encountering this project

def create_tree_train_set(training_set:dict, tsvfilename:str, path_to_server_dir: str):
    """
    Converts input data to TSV 
    The creates a tree from that TSV
    Takes as input a pois["tags"] column of a pandas geodata object
    Meant for use in final experiments
    tsvfilename is an input that should stay consistent throughout the experiment, as all corresponding files will be named as such
    path_to_server_dir should be the path to a copy of RecommenderServer on github: https://github.com/martaannaj/RecommenderServer
    """
    
    with open(tsvfilename, "w") as tsvfile:
        tsv_writer = csv.writer(tsvfile, delimiter='\t')
        for obj in training_set:
            listed = list(obj.keys())
            if len(listed) > 0:
                # Making sure the list isn't empty, even though it should never be
                tsv_writer.writerow(listed)
        
        tsvfile.close()

    # This is the code to query the RecommenderServer to create a tree
    result = subprocess.run(['cmd', '/c', 'cd'], capture_output=True, text=True)
    subprocess.run(['RecommenderServer', 'build-tree', 'from-tsv', result.stdout.strip() + '/' + tsvfilename], cwd= path_to_server_dir)

def multiquery(tsvfilename: str, path_to_server_dir: str, query_list: list[list[str]], n:int = 1) -> list[str]:
    """
    Opens a server and queries it for every property list in query_list without closing the server
    Stores the query results in order
    n is the amount of tags it should predict for every property list, for our experiments we will keep this 1
    """

    # Below line later used to check our output
    _JSON_RE = re.compile(r"\{.*\}", re.S) 
    counter = 0
    # Open the server
    # Once again you need the path to your recommenderServer
    open_server = subprocess.Popen(['RecommenderServer', 'serve', tsvfilename + '.schemaTree.typed.pb'], cwd= path_to_server_dir)
    response_list = []

    for property_list in query_list:
        # This is the command that works with Windows Powershell
        powershell_command = """
        $body = '{"properties": """ + property_list + ""","types":[]}'
        $response = Invoke-WebRequest -Uri "http://localhost:8080/recommender" -Method POST -Body $body -ContentType "application/json"
        $response.Content
        """
        result = subprocess.run(["powershell", "-Command", powershell_command], capture_output=True, text=True)
        
        # This checks if the result is actually there, and gives it some time if it's not
        while not _JSON_RE.search(result.stdout) and counter < 10:
            time.sleep(1)
            counter += 1
            result = subprocess.run(["powershell", "-Command", powershell_command], capture_output=True, text=True)

        # Taking the n first recommendations out of the servers' response
        parsed = json.loads(result.stdout) 
        for rec in parsed["recommendations"][:n]:
            # Even if it is None, we still need a result in the list for the alignment
            if rec["property"] is None:
                response_list.append("None")
                print("None")
            else: 
                response_list.append(rec["property"])
    
    # Finally, we terminate the server and return our responses
    open_server.terminate()
    
    return response_list




In [None]:
# Importing the training set to create the tree
with open('trainingset', 'rb') as fp:
    trainingloaded = pickle.load(fp)
# And the test set for querying and answers for checking
with open('testset_questions', 'rb') as fp:
    questions = pickle.load(fp)
with open('testset_answer', 'rb') as fp:
    answers = pickle.load(fp)

# Path to the RecommenderServer folder
path_to_server_dir = "Put the path to RecommenderServer here"
# Below is our tsvfilename
# Make sure it is a valid filename that ends in .tsv
testtsv = "Your filename here.tsv"

In [None]:
# Create the tree from our trainingset
create_tree_train_set(trainingloaded["tags"], testtsv, path_to_server_dir)
# Before running the rest of the cells, this tree file needs to be moved to the folder of the recommenderserver

In [None]:
# This cell can take a while to run

# Creating a multiquery from our test questions set
questions_multi = [json.dumps(q) for q in questions]

full_results = []

# Querying the server in batches of 100
for i in range(0, len(questions_multi), 100):
    # A quick way to keep an eye on your progress while running
    print(i, i+100)
    batch_results = multiquery(testtsv, path_to_server_dir, questions_multi[i:i+100])
    full_results.append(batch_results)

In [None]:
# Quickly compile all batches into one
all_results = []

for i in full_results:
    all_results += i

In [None]:
# Set to True if you want to save your results
if False:
    with open('predicted_answers_schematree_test', 'wb') as fb:
        pickle.dump(full_results, fb)

# Otherwise you can load previous results to check the classification report
with open('predicted_answers_schematree_test', 'rb') as fb:
    full_results = pickle.load(fb)

In [None]:
print(classification_report(answers, all_results, zero_division=0))

In [None]:
# Below code utilizes convert_report2excel to put the classifaction report into excel for easier lookup

workbook = Workbook()
workbook.remove(workbook.active) # Delete default sheet.

report = classification_report(
    answers,
    all_results[:8291],
    zero_division=0,
    output_dict=True
)

workbook = convert_report2excel(
    workbook=workbook,
    report=report,
    sheet_name = "schematree2_report"
)
workbook.save("schematree2_report.xlsx")