# Manditory Requirements

In [13]:
import pandas as pd 
import requests
import pymongo
from pymongo import MongoClient

def send_http_request(url: str, body=None, method="POST"):
    try:
        response = requests.request(method, url, json=body)
        # Check if the request was successful (status code 200)        
        if response.status_code == 200:
            content_type = response.headers.get('Content-Type')
            if content_type and 'application/json' in content_type:
                try:
                    return response.json()
                except requests.exceptions.JSONDecodeError as e1:
                    return response.text
            else:
                return response.text

        elif response.status_code != 204 :
            print(f"Request exited with status code {response.status_code}: {response.reason}")
    except requests.RequestException as e:
        return e
    return None

Constant mapping between exercise names and their root derivation trees:

In [2]:
name_to_model_ids = {
    'Courses': ['JDKw8yJZF5fiP3jv3', 'PSqwzYAfW9dFAa9im'],
    'ProductionLine_v2_v3': ['aTwuoJgesSd8hXXEP', 'bNCCf9FMRZoxqobfX'],
    'Train': ['QxGnrFQnXPGh2Lh8C'],
    'SocialNetwork': ['dkZH6HJNQNLLDX6Aj'],
    'TrashFOL': ['sDLK7uBCbgZon3znd'],
    'ClassroomFOL': ['YH3ANm7Y5Qe5dSYem'],
    'TrashRL': ['PQAJE67kz8w5NWJuM'],
    'ClassroomRL': ['zRAn69AocpkmxXZnW'],
    'Graphs': ['gAeD3MTGCCv8YNTaK'],
    'LTS': ['zoEADeCW2b2suJB2k'],
    'ProductionLine_v1': ['jyS8Bmceejj9pLbTW'],
    'CV': ['JC8Tij8o8GZb99gEJ'],
    'TrashLTL': ['9jPK8KBWzjFmBx4Hb']
}


# Initialize Alloy4fun System

## Populate Alloy4fun Database With Public Data

In [6]:
from populate_database import populate_database
populate_database()

Downloading zoEADeCW2b2suJB2k.json
Downloading PSqwzYAfW9dFAa9im.json
Downloading PQAJE67kz8w5NWJuM.json
Downloading WGdhwKZnCu7aKhXq9.json
Downloading QxGnrFQnXPGh2Lh8C.json
Downloading sDLK7uBCbgZon3znd.json
Downloading YH3ANm7Y5Qe5dSYem.json
Downloading zRAn69AocpkmxXZnW.json
Downloading JC8Tij8o8GZb99gEJ.json
Downloading dkZH6HJNQNLLDX6Aj.json
Downloading 9jPK8KBWzjFmBx4Hb.json
Downloading jyS8Bmceejj9pLbTW.json
Downloading aTwuoJgesSd8hXXEP.json
Downloading JDKw8yJZF5fiP3jv3.json
Downloading FwCGymHmbqcziisH5.json
Downloading bNCCf9FMRZoxqobfX.json
Downloading gAeD3MTGCCv8YNTaK.json
Generating Links


This polulate script also generates meteor links for every model usind their real id. Assuming meteor is running, one can access the social network public model with the link http://localhost:3000/dkZH6HJNQNLLDX6Aj . Private models can be acessed by appending the word 'original' to the link, such as http://localhost:3000/dkZH6HJNQNLLDX6Ajoriginal .

## Setup Hint System

In [3]:
#HIGENA GRAPH ENDPOINTS (optional, disabled)
"""
for (name,ids) in name_to_model_ids.items():
    print(send_http_request(url="http://localhost:8080/hint/higena-setup",  body=ids))
"""
# SPECASSISTANT GRAPH ENDPOINTS
send_http_request(url="http://localhost:8080/hint/debug-drop-db", method="GET")

# WARNING: This will take several minutes (shortest record is 11 min)
for name, ids in name_to_model_ids.items():
    print("Requesting setup for " + name ,end="... ")
    print(send_http_request(url="http://localhost:8080/hint/specassistant-setup?prefix="+name, body=ids, method="GET"))

# NOTE: Execution times are stored by the api application within its database

Requesting setup for Courses... Setup completed for Courses with model_ids [JDKw8yJZF5fiP3jv3, PSqwzYAfW9dFAa9im]
Requesting setup for ProductionLine_v2_v3... Setup completed for ProductionLine_v2_v3 with model_ids [aTwuoJgesSd8hXXEP, bNCCf9FMRZoxqobfX]
Requesting setup for Train... Setup completed for Train with model_ids [QxGnrFQnXPGh2Lh8C]
Requesting setup for SocialNetwork... Setup completed for SocialNetwork with model_ids [dkZH6HJNQNLLDX6Aj]
Requesting setup for TrashFOL... Setup completed for TrashFOL with model_ids [sDLK7uBCbgZon3znd]
Requesting setup for ClassroomFOL... Setup completed for ClassroomFOL with model_ids [YH3ANm7Y5Qe5dSYem]
Requesting setup for TrashRL... Setup completed for TrashRL with model_ids [PQAJE67kz8w5NWJuM]
Requesting setup for ClassroomRL... Setup completed for ClassroomRL with model_ids [zRAn69AocpkmxXZnW]
Requesting setup for Graphs... Setup completed for Graphs with model_ids [gAeD3MTGCCv8YNTaK]
Requesting setup for LTS... Setup completed for LTS wit

# Choose a policy

In this notebook we choose the same policty for every graph for convinience, but this does not have to be the case.

In [5]:
policy="MIN-TED"

# This will take around a minute to complete (shortest record is 27 seconds)
send_http_request(url="http://localhost:8080/hint/compute-all-policies-for-rule?rule="+policy, method="POST")

'Policy computed'

# Database Study

In [6]:
# Database basic requirements
from a4f_mongo_pipelines import *

mongo_uri = "mongodb://localhost:27017/"
database_name = "meteor"

## Data Gathering

### Get GraphId Maps

A graph is created for each command of each exercise. We need each one of their ids to iterate over them.

In [16]:
client = MongoClient(mongo_uri)
db = client[database_name]

graph_collection = db["Graph"]

name_to_graph_ids = {} 

for doc in graph_collection.aggregate(get_graph_id_dict_pipeline()):
    name_to_graph_ids[doc["_id"]] = doc["graph_ids"]

client.close()

for name, lst in name_to_graph_ids.items():
    print(f'{name:22}: [{lst[0]}, ...] (size = {len(lst):2})')


Courses               : [66798a6ccc9df62b59a8327b, ...] (size = 15)
CV                    : [66798cf3cc9df62b59a904c1, ...] (size =  4)
TrashLTL              : [66798d02cc9df62b59a908d5, ...] (size = 19)
ClassroomRL           : [66798c78cc9df62b59a8e5cd, ...] (size = 14)
TrashRL               : [66798c56cc9df62b59a8df83, ...] (size =  9)
Train                 : [66798b21cc9df62b59a883b2, ...] (size = 10)
Graphs                : [66798cadcc9df62b59a8f333, ...] (size =  7)
LTS                   : [66798ccecc9df62b59a8fbb5, ...] (size =  6)
ClassroomFOL          : [66798c23cc9df62b59a8d2d2, ...] (size = 14)
SocialNetwork         : [66798b63cc9df62b59a89c9c, ...] (size =  8)
TrashFOL              : [66798c05cc9df62b59a8ce6a, ...] (size =  9)
ProductionLine_v1     : [66798ce8cc9df62b59a90296, ...] (size =  4)
ProductionLine_v2_v3  : [66798ae7cc9df62b59a86b02, ...] (size = 10)


### Get Graph Statistics

Gather the amount of valid/invalid formulas/submissions integrated in each graph. 

In [17]:
client = MongoClient(mongo_uri)
db = client[database_name]

node_collection = db["Node"]

data = list(node_collection.aggregate(get_graph_node_statistics()))

graph_stats_df = pd.DataFrame(data)

client.close()

graph_stats_df

Unnamed: 0,name,valid_formulas,invalid_formulas,valid_submissions,invalid_submissions
0,CV-Inv1OK,7,42,77,118
1,CV-Inv2OK,16,63,52,98
2,CV-Inv3OK,6,109,26,172
3,CV-Inv4OK,1,197,1,314
4,ClassroomFOL-inv10OK,3,30,147,43
...,...,...,...,...,...
124,TrashRL-inv5Ok,10,75,155,193
125,TrashRL-inv6Ok,12,78,172,192
126,TrashRL-inv7Ok,25,108,135,268
127,TrashRL-inv8Ok,13,6,159,14


### Get Popular Node Data Frames

WARNING: Requires GraphId Maps

In [None]:
client = MongoClient(mongo_uri)
db = client[database_name]

node_collection = db["Node"]

name_to_pop_dfs = {}

for (name,graph_ids) in name_to_graph_ids.items():
    data = list(node_collection.aggregate(get_popular_nodes_pipeline(graph_ids)))[0:30] # Limits output to first 30 entries
    df_ = pd.DataFrame(data)
    name_to_pop_dfs[name] = df_

client.close()

name_to_pop_dfs


for name, frame in name_to_pop_dfs.items():
    print(name +":")
    print(frame,end="\n\n")

### Get Min Solutions Data Frames

WARNING: Requires GraphId Maps

In [None]:
client = MongoClient(mongo_uri)
db = client[database_name]

node_collection = db["Node"]

name_to_min_sol_dfs = {}

for (name,graph_ids) in name_to_graph_ids.items():
    data = list(node_collection.aggregate(get_min_solutions_pipeline(graph_ids)))
    df_ = pd.DataFrame(data)
    name_to_min_sol_dfs[name] = df_

client.close()

for name, frame in name_to_min_sol_dfs.items():
    print(name +":")
    print(frame,end="\n\n")

## Data Frame Persistance

### Write As Multiple Csvs

General Statistics

In [None]:
graph_stats_df.to_csv(path_or_buf="graph_stats.csv",sep=';',float_format='%g',mode='w', index=False)

Popular Formulas

In [None]:
for (name, df_) in name_to_pop_dfs.items():
    df_.to_csv(path_or_buf=name+".popularity.csv",sep=';',float_format='%g',mode='w', index=False)

Solution Formulas

In [None]:
for (name, df_) in name_to_min_sol_dfs.items():
    df_.to_csv(path_or_buf=name+".solution.csv",sep=';',float_format='%g',mode='w', index=False)

### Write as Sheets of a Single XLSX File

WARNING: Requires Every DataFrame

In [None]:
import xlsxwriter

with pd.ExcelWriter('db_study.xlsx', engine='xlsxwriter') as writer:
    workbook = writer.book
    text_wrap = workbook.add_format({'text_wrap': True, 'valign': 'top'})
    bold = workbook.add_format({'bold': True})
    for name in sorted(list(name_to_model_ids.keys())):
        sheet = workbook.add_worksheet(name=name)

        sheet.set_column(0,0,15)
        sheet.set_column(1,1,100,text_wrap)
        sheet.set_column(2,2,15)
        sheet.set_column(3,3,100,text_wrap)
        sheet.set_column(4,4,27)
        
        row = 0
        sheet.merge_range(row,0,row,len(name_to_pop_dfs[name]),"The 30 most frequent formulas",bold)
        row+=1
        name_to_pop_dfs[name].to_excel(excel_writer=writer,sheet_name=name,startrow=row, index=False)
        row+= name_to_pop_dfs[name].shape[0] + 2
        sheet.merge_range(row,0,row,len(name_to_min_sol_dfs[name]),"The valid formulas ordered by their frequency",bold)
        row+=1
        name_to_min_sol_dfs[name].to_excel(excel_writer=writer,sheet_name=name,startrow=row, index=False)
        row+= name_to_min_sol_dfs[name].shape[0] + 2
    
    graph_stats_df.to_excel(excel_writer=writer,sheet_name="General Statistics", index=False)
    workbook.get_worksheet_by_name('General Statistics').set_column(0,0,30)
    workbook.get_worksheet_by_name('General Statistics').set_column(1,4,20)

# Policy Study

Read the targeted formulas

In [32]:
exercise = "dkZH6HJNQNLLDX6Aj" # Social Network
data = pd.read_csv('formulas.csv', delimiter=';')

input_data = None
input_data = pd.read_csv('formulas.csv', delimiter=';').sort_values(by='Predicate')
input_data = input_data.reset_index()

body = dict()

for index, row in input_data.iterrows():
    try:
        body[row['Predicate']].append(row['Formula'])
    except KeyError:
        body[row['Predicate']] = [row['Formula']]

input_data

Unnamed: 0,index,Predicate,Formula
0,0,inv1,all p: Photo |some u: User | u -> p in posts
1,1,inv1,all p: Photo | p in User . posts
2,2,inv1,"all p: Photo, u: User | p in u . posts"
3,3,inv5,"all i: Influencer, u: User | i in u . follows"
4,4,inv5,all u: User | Influencer in u . follows
5,5,inv5,all u: User | u . follows in Influencer


Collect the result of testing each formula for each policy

In [33]:
output=requests.request("POST", "http://localhost:8080/study/test-all-policies-on-formulas?model_id="+exercise, json=body).json()
df = pd.DataFrame(output)
df

Unnamed: 0,policy,formula,predicate,hint,nextFormula,normalizedFormula
0,POPULARITY,all p: Photo |some u: User | u -> p in posts,inv1,Keep going! Instead of using inclusion operato...,(all ref0:(one Photo)|(some ((User <: posts) ....,(all ref0:(one Photo)|(some ref1:(one User)|((...
1,POPULARITY,all p: Photo | p in User . posts,inv1,Keep going! Consider adding a unique quantifie...,(all ref0:(one Photo)|(one ref1:(one User)|(re...,(all ref0:(one Photo)|(ref0 in (User . (User <...
2,POPULARITY,"all p: Photo, u: User | p in u . posts",inv1,One step away from the solution! Instead of us...,(all ref0:(one Photo)|(some ref1:(one User)|(r...,"(all ref0:(one Photo),ref1:(one User)|(ref0 in..."
3,POPULARITY,"all i: Influencer, u: User | i in u . follows",inv5,Keep going! Consider adding a implication oper...,"(all ref0:(one Influencer),ref1:(one User)|((r...","(all ref0:(one Influencer),ref1:(one User)|(re..."
4,POPULARITY,all u: User | Influencer in u . follows,inv5,Near a solution! Consider adding a difference ...,(all ref0:(one User)|((Influencer - ref0) in (...,(all ref0:(one User)|(Influencer in (ref0 . (U...
...,...,...,...,...,...,...
91,MIN-TED,all p: Photo | p in User . posts,inv1,Keep going! Consider adding a unique quantifie...,(all ref0:(one Photo)|(one ref1:(one User)|(re...,(all ref0:(one Photo)|(ref0 in (User . (User <...
92,MIN-TED,"all p: Photo, u: User | p in u . posts",inv1,One step away from the solution! Instead of us...,(all ref0:(one Photo)|(one ref1:(one User)|(re...,"(all ref0:(one Photo),ref1:(one User)|(ref0 in..."
93,MIN-TED,"all i: Influencer, u: User | i in u . follows",inv5,Near a solution! Consider adding a difference ...,"(all ref0:(one Influencer),ref1:(one (User - r...","(all ref0:(one Influencer),ref1:(one User)|(re..."
94,MIN-TED,all u: User | Influencer in u . follows,inv5,Near a solution! Consider adding a difference ...,(all ref0:(one User)|((Influencer - ref0) in (...,(all ref0:(one User)|(Influencer in (ref0 . (U...


Group duplicate hints, each policy hit is aggregated in a matrix.

In [34]:
condensed_output = dict()
for obj in output:
    try:
        condensed_output[(obj['predicate'],obj['formula'],obj.get('nextFormula', None))][obj['policy']] = True
    except KeyError:
        copy = dict(obj)
        policy = obj['policy']
        copy.pop('policy',None)
        copy[policy] = True
        condensed_output[(obj['predicate'],obj['formula'],obj['nextFormula'])] = copy
    
df = pd.DataFrame(condensed_output.values())
df

Unnamed: 0,formula,predicate,hint,nextFormula,normalizedFormula,POPULARITY,BALANCED-TEDCOMP,MIN-COMPLEXITY,MINMAX-TED,MIN-TED,...,BALANCED-TEDCOMPxPOPULARITY,MINMAX-COMP,MAX-FREQ,COMPxPOPULARITY,TEDxPOPULARITY,Arrival,MAXIMIN-FREQ,COMPxArrival,BALANCED-TEDCOMPxArrival,MIN-ONE
0,all p: Photo |some u: User | u -> p in posts,inv1,Keep going! Instead of using inclusion operato...,(all ref0:(one Photo)|(some ((User <: posts) ....,(all ref0:(one Photo)|(some ref1:(one User)|((...,True,True,True,,,...,,,,,,,,,,
1,all p: Photo | p in User . posts,inv1,Keep going! Consider adding a unique quantifie...,(all ref0:(one Photo)|(one ref1:(one User)|(re...,(all ref0:(one Photo)|(ref0 in (User . (User <...,True,,,True,True,...,,,,,,,,,,
2,"all p: Photo, u: User | p in u . posts",inv1,One step away from the solution! Instead of us...,(all ref0:(one Photo)|(some ref1:(one User)|(r...,"(all ref0:(one Photo),ref1:(one User)|(ref0 in...",True,,True,,,...,,,,,,,,,,
3,"all i: Influencer, u: User | i in u . follows",inv5,Keep going! Consider adding a implication oper...,"(all ref0:(one Influencer),ref1:(one User)|((r...","(all ref0:(one Influencer),ref1:(one User)|(re...",True,,,,,...,,,,,,,,,,
4,all u: User | Influencer in u . follows,inv5,Near a solution! Consider adding a difference ...,(all ref0:(one User)|((Influencer - ref0) in (...,(all ref0:(one User)|(Influencer in (ref0 . (U...,True,True,,True,True,...,True,True,True,True,True,True,True,,,
5,all u: User | u . follows in Influencer,inv5,Keep going! You can use variables to help spec...,"(all ref0:(one Influencer),ref1:(one User)|((r...",(all ref0:(one User)|((ref0 . (User <: follows...,True,,True,,,...,,,True,,,True,True,True,True,
6,all p: Photo |some u: User | u -> p in posts,inv1,One step away from the solution! Instead of us...,(all ref0:(one Photo)|(one ref1:(one User)|((r...,(all ref0:(one Photo)|(some ref1:(one User)|((...,,,,True,True,...,True,,,True,True,,,,,
7,all p: Photo | p in User . posts,inv1,Keep going! Consider adding a conjunction oper...,"((all ref0:(one Photo),ref1:(one User),ref2:(o...",(all ref0:(one Photo)|(ref0 in (User . (User <...,,True,,,,...,,,,,,True,,True,,True
8,"all p: Photo, u: User | p in u . posts",inv1,One step away from the solution! Instead of us...,(all ref0:(one Photo)|(one ref1:(one User)|(re...,"(all ref0:(one Photo),ref1:(one User)|(ref0 in...",,,,True,True,...,True,,True,True,True,,,,,True
9,"all i: Influencer, u: User | i in u . follows",inv5,Keep going! Consider adding a disjunction oper...,"(all ref0:(one Influencer),ref1:(one User)|((r...","(all ref0:(one Influencer),ref1:(one User)|(re...",,,,,,...,,,,,,True,,True,True,True


Write the data to a file

In [None]:
df.sort_values(by='predicate', ascending=True)
df.to_csv("hints.csv", index=False, sep=";")


# Statistical evaluations

## Split Dataset into Test/Train and retrain system

In [36]:
# Takes a several minutes (shortest record : 6 minutes for spliting and 7 minutes for training)
send_http_request(url="http://localhost:8080/study/split-train-spec-assist", body=name_to_model_ids, method="POST")

Request exited with status code 500: Internal Server Error


## Preform Evaluation Tests

In [37]:
# Takes a few minutes (shortest record : 3 minutes)
send_http_request(url="http://localhost:8080/study/test-spec-assist",  method="POST")

Request exited with status code 500: Internal Server Error


## Preform Tar evaluations
If you wish to test tar uncomment and run the following code. This process however can take days to complete as TAR is slow to process a significant amount of the entries.

In [None]:
# WARNING: THIS WILL TAKE A HOURS OR PERHAPS DAYS TO COMPLETE

# send_http_request(url="http://localhost:8080/study/test-TAR", method="POST")

TAR is not data-driven so it's accuracy does not depend on our training dataset. As a result, in our evaluations we tested TAR for every model and then limited the result view for in every test set we used. This allowed us to better manage the time required by the evaluation. The results can be found in the file TAR_test_data.json. If you wish to use them you can run the following block to import them to the hint system, however be warned that the specified execution times are tied to the machine that we used.

In [2]:
import pymongo
import json

client = pymongo.MongoClient()
collection = client["meteor"]["Test"]

with open("TAR_test_data.json", mode="r") as file:
    collection.insert_many(json.loads(file.read()))

client.close()

## Gather and Aggregate Test Data from Database

In [14]:
from extract_test_data import extract_test_data_pipeline

# Correct the names specified on imported datasets (shortest record: 1 min)
send_http_request(url="http://localhost:8080/study/fix-naming",  method="POST")

client = pymongo.MongoClient()
collection = client["meteor"]["Test"]

data = list(collection.aggregate(extract_test_data_pipeline()))

df = pd.DataFrame(data)

df = df.sort_values(by='_id', ascending=True)
df = df.rename(columns={"_id":"name"})

client.close()

df

Unnamed: 0,name,NONE,SPEC,MUT,SPEC & MUT,TAR,TAR & SPEC,TAR & MUT,TAR & SPEC & MUT,policy_count,parsing_count,policy_time,parsing_time,count,TAR_TIME,TAR_DEV,MUT_TIME,MUT_DEV
7,CV,132,28,2,6,41,0,3,5,260,595,1.819439,23.093191,217,48.860626,16.069239,0.716002,0.39585
4,ClassroomFOL,286,41,77,156,21,15,39,64,2462,2685,15.289405,272.981975,699,25.282328,23.137584,0.424946,0.191245
12,ClassroomRL,276,66,62,80,54,13,37,50,2229,2503,19.859469,270.166752,638,37.700568,24.08205,0.329034,0.148926
1,Courses,986,205,332,404,195,34,169,214,19100,20357,135.467241,857.465778,2539,36.691237,25.324486,0.714441,0.377195
9,Graphs,67,14,27,61,44,1,61,70,1150,1276,8.249016,277.601594,345,19.695862,21.093843,0.24812,0.129079
6,LTS,203,36,60,48,15,0,11,11,846,963,6.258112,249.633888,384,8.666307,19.4494,0.257396,0.123154
3,ProductionLine_v1,1,19,0,3,38,0,20,29,366,413,2.439167,72.698792,110,0.066012,0.125767,0.339227,0.139986
0,ProductionLine_v2_v3,478,71,102,116,76,11,35,122,7542,8118,61.361245,439.392716,1011,38.467583,24.383484,0.738202,0.41187
5,SocialNetwork,884,241,352,655,95,16,83,308,9630,10409,50.370827,519.530469,2634,44.618394,23.357551,0.642241,0.317328
10,Train,422,97,164,278,83,4,59,203,4046,4466,25.8527,369.304952,1310,37.931179,26.340377,0.757973,0.413017


Write the data to a file

In [9]:
df.to_csv("statistics.csv", index=False, sep=";")