In [36]:
import os
import csv
import random
import glob
import rdflib
import re
from SPARQLWrapper import SPARQLWrapper, JSON, BASIC
import pickle
import copy

In [37]:
def loadAuthFile():
    with open('auth_weighted_markov_chain.txt') as f:
        auth =  f.readlines()
        auth = [x.replace('\n','') for x in auth]
    return auth
auth = loadAuthFile()

In [38]:
def get_all_activities(auth):
    result = None
    queryString = """
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX : <http://example.org/virtualhome2kg/ontology/>
        PREFIX ho: <http://www.owl-ontologies.com/VirtualHome.owl#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        select distinct ?activity where {
            ?s a ?activity .
            ?activity rdfs:subClassOf/rdfs:subClassOf ho:Activity .
        }
    """
    sparql = SPARQLWrapper(auth[0])
    sparql.setHTTPAuth(BASIC)
    sparql.setCredentials(auth[1], auth[2])
    sparql.addParameter('infer', 'false')
    

    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)

    try :
        json = sparql.query().convert()
        bindings = json['results']['bindings']
        if len(bindings) > 0:
            result =  [x["activity"]["value"] for x in bindings]
    except  Exception as e:
        print(e.args)
    return result

In [39]:
def create_ngram(sequence_list, avg_answer_list, n):    
    ngram = {}
    for seq, avg_answer in zip(sequence_list, avg_answer_list):
        for i in range(n-1):
            if seq[i] in ngram:
                values = ngram[seq[i]]
                #print(seq)
                if seq[i+1] in values.keys():
#                     values[seq[i+1]] += avg_answer
                    values[seq[i+1]]["count"] += 1
                    values[seq[i+1]]["value"] += avg_answer
                else:
                    values[seq[i+1]] = {"count": 1, "value": avg_answer}
                ngram[seq[i]] = values
            else:
                tmp = {}
                tmp[seq[i+1]] = {"count": 1, "value": avg_answer}
                ngram[seq[i]] = tmp
    return ngram

In [40]:
def update_ngram(ngram, sequence_list, avg_answer_list, n):    
    for seq, avg_answer in zip(sequence_list, avg_answer_list):
        for i in range(n-1):
            if seq[i] in ngram:
                values = ngram[seq[i]]
                #print(seq)
                if seq[i+1] in values.keys():
#                     values[seq[i+1]] += avg_answer
                    values[seq[i+1]]["count"] += 1
                    values[seq[i+1]]["value"] += avg_answer
                else:
                    values[seq[i+1]] = {"count": 1, "value": avg_answer}
                ngram[seq[i]] = values
            else:
                tmp = {}
                tmp[seq[i+1]] = {"count": 1, "value": avg_answer}
                ngram[seq[i]] = tmp
    return ngram

In [41]:
#遷移確率
def create_transition_probability(ngram):    
    transition_probability = {}
    for current_activity in ngram:
        next_activities = ngram[current_activity]
        num = 0
        for na_key in next_activities:
            num += next_activities[na_key]
        probability = {}
        for na_key in next_activities:
            probability[na_key] = next_activities[na_key] / num
        transition_probability[current_activity] =probability
    return transition_probability

In [42]:
def markov_chain(transition_probability):    
    current_activity = "start"
    activity_list = []
    for i in range(6):
        next_candidates = {}
        next_candidates = transition_probability[current_activity]

        keys = []
        values = []
        for x in next_candidates:
            keys.append(x)
            values.append(next_candidates[x])
        next_activity = random.choices(
            population=keys,
            weights=values
        )
        current_activity = next_activity[0]
        if current_activity == "end":
            break
        activity_list.append(current_activity)
    return activity_list

In [43]:
data_path = "episodes/amtloop0/*.csv"
aggregate = {}
# read results of AMT
for file_path in glob.glob(data_path):
    cs_results = []
    with open(file_path, encoding="utf-8", newline="") as f:
        for cols in csv.reader(f, delimiter=","):
            cs_results.append(cols)
    # delete header
    cs_results.pop(0)
    
    # aggregate
    for line in cs_results:
        episode = line[27]
        answer = line[28]
        if episode in aggregate:
            aggregate[episode] = aggregate[episode] + int(re.sub(' \(.+\)', '', answer))
        else:
            aggregate[episode] = int(re.sub(' \(.+\)', '', answer))
    
# avarage score
for episode in aggregate:
    aggregate[episode] = aggregate[episode]/100

In [44]:
sequence_list = []
avg_answer_list = []
for episode in aggregate:
    sequence = episode.split(" → ")
    tmp = ['start']
    tmp.extend(sequence)
    tmp.extend(['end'])
    sequence_list.append(tmp)
    avg_answer_list.append(aggregate[episode])

In [45]:
ngram = create_ngram(sequence_list=sequence_list, avg_answer_list=avg_answer_list, n=6)
ngram_score = copy.deepcopy(ngram)
bias = 1

for x in ngram:
    for y in ngram[x]:
        count = ngram[x][y]["count"]
        value = ngram[x][y]["value"]
        avg_value = (value/count) * bias
        ngram_score[x][y] = avg_value
auth = loadAuthFile()
activities = get_all_activities(auth)
activities = [x.replace("http://www.owl-ontologies.com/VirtualHome.owl#", "").replace('_', ' ').capitalize() for x in activities]

# amt実験のときに出てこなかったactivityは全てコスト1として追加
for key in ngram_score:
    values = ngram_score[key]
    for activity in activities:
        if activity not in values:
            values[activity] = 3

for activity in activities:
    if activity not in ngram_score:
        ngram_score[activity] = dict(zip(activities,[3 for x in range(len(activities))]))

transition_probability = create_transition_probability(ngram_score)

In [46]:
with open('episodes/amtloop1_worker100_episode10_ngram_bias1_base3.pickle', 'wb') as f:
    pickle.dump(ngram, f)

In [47]:
ngram

{'start': {'Watch movie': {'count': 4, 'value': 15.19},
  'Go to sleep': {'count': 13, 'value': 50.09},
  'Take nap': {'count': 11, 'value': 43.73},
  'Make toast': {'count': 6, 'value': 23.15},
  'Sleep': {'count': 11, 'value': 43.43},
  'Keep an eye on stove as something is cooking': {'count': 4, 'value': 15.84},
  'Use computer': {'count': 2, 'value': 7.17},
  'Have snack': {'count': 4, 'value': 15.36},
  'Watch youtube': {'count': 3, 'value': 12.120000000000001},
  'Send  email': {'count': 1, 'value': 4.16},
  'Breakfast': {'count': 7, 'value': 27.259999999999998},
  'Cook some food': {'count': 3, 'value': 11.67},
  'Write an email': {'count': 7, 'value': 27.94},
  'Make coffee': {'count': 8, 'value': 31.490000000000002},
  'Cut bread': {'count': 4, 'value': 16.14},
  'Type up document': {'count': 1, 'value': 3.93},
  'Surf net': {'count': 1, 'value': 4.08},
  'Chop vegetables': {'count': 1, 'value': 4.22},
  'Wash teeth': {'count': 3, 'value': 11.78},
  'Wash dishes with dishwashe

# start updating transition probability

In [13]:
#amtloop0はこのパートを実行しない
pre_ngram = {}
# amtloop1はngramを、amtloop2からはupdated_ngramをロードする
with open('episodes/amtloop1_worker100_episode10_ngram_bias1.pickle', 'rb') as f:
    pre_ngram = pickle.load(f)

In [14]:
pre_ngram

{'start': {'Watch movie': {'count': 4, 'value': 15.19},
  'Go to sleep': {'count': 13, 'value': 50.09},
  'Take nap': {'count': 11, 'value': 43.73},
  'Make toast': {'count': 6, 'value': 23.15},
  'Sleep': {'count': 11, 'value': 43.43},
  'Keep an eye on stove as something is cooking': {'count': 4, 'value': 15.84},
  'Use computer': {'count': 2, 'value': 7.17},
  'Have snack': {'count': 4, 'value': 15.36},
  'Watch youtube': {'count': 3, 'value': 12.120000000000001},
  'Send  email': {'count': 1, 'value': 4.16},
  'Breakfast': {'count': 7, 'value': 27.259999999999998},
  'Cook some food': {'count': 3, 'value': 11.67},
  'Write an email': {'count': 7, 'value': 27.94},
  'Make coffee': {'count': 8, 'value': 31.490000000000002},
  'Cut bread': {'count': 4, 'value': 16.14},
  'Type up document': {'count': 1, 'value': 3.93},
  'Surf net': {'count': 1, 'value': 4.08},
  'Chop vegetables': {'count': 1, 'value': 4.22},
  'Wash teeth': {'count': 3, 'value': 11.78},
  'Wash dishes with dishwashe

In [15]:
# update ngram
updated_ngram = update_ngram(ngram=pre_ngram, sequence_list=sequence_list, avg_answer_list=avg_answer_list, n=6)

In [16]:
updated_ngram

{'start': {'Watch movie': {'count': 9, 'value': 36.22},
  'Go to sleep': {'count': 15, 'value': 58.22},
  'Take nap': {'count': 13, 'value': 51.98},
  'Make toast': {'count': 8, 'value': 31.54},
  'Sleep': {'count': 14, 'value': 55.8},
  'Keep an eye on stove as something is cooking': {'count': 7, 'value': 28.17},
  'Use computer': {'count': 4, 'value': 15.52},
  'Have snack': {'count': 6, 'value': 23.75},
  'Watch youtube': {'count': 6, 'value': 24.35},
  'Send  email': {'count': 2, 'value': 8.29},
  'Breakfast': {'count': 11, 'value': 44.0},
  'Cook some food': {'count': 8, 'value': 32.33},
  'Write an email': {'count': 11, 'value': 44.7},
  'Make coffee': {'count': 8, 'value': 31.490000000000002},
  'Cut bread': {'count': 7, 'value': 28.560000000000002},
  'Type up document': {'count': 6, 'value': 24.79},
  'Surf net': {'count': 3, 'value': 12.55},
  'Chop vegetables': {'count': 7, 'value': 29.23},
  'Wash teeth': {'count': 9, 'value': 36.55},
  'Wash dishes with dishwasher': {'coun

In [17]:
with open('episodes/amtloop2_worker100_episode10_updated_ngram_bias10.pickle', 'wb') as f:
    pickle.dump(updated_ngram, f)

In [18]:
updated_ngram_score = copy.deepcopy(updated_ngram)
bias = 10

for x in updated_ngram:
    for y in updated_ngram[x]:
        count = updated_ngram[x][y]["count"]
        value = updated_ngram[x][y]["value"]
        avg_value = (value/count) * bias
        updated_ngram_score[x][y] = avg_value
auth = loadAuthFile()
activities = get_all_activities(auth)
activities = [x.replace("http://www.owl-ontologies.com/VirtualHome.owl#", "").replace('_', ' ').capitalize() for x in activities]

# amt実験のときに出てこなかったactivityは全てコスト1として追加
for key in updated_ngram_score:
    values = updated_ngram_score[key]
    for activity in activities:
        if activity not in values:
            values[activity] = 1

for activity in activities:
    if activity not in updated_ngram_score:
        updated_ngram_score[activity] = dict(zip(activities,[1 for x in range(len(activities))]))

transition_probability = create_transition_probability(updated_ngram_score)

# end updating transition probability

In [48]:
for i in range(10):
    episode_list = []
    while True:
        mc = markov_chain(transition_probability)
        if len(mc) == 6:
            episode_list.append(mc)
        if len(episode_list) == 10:
            break
    with open("episodes/amtloop1_worker100_episode10_list" + str(i+1) + "_scene1_bias1_base3.csv", 'w') as f:
        f.write("text\n")
        for episode in episode_list:
            f.write("%s\n" % ' → '.join(episode))

In [49]:
updated_ngram_score

{'start': {'Watch movie': 40.24444444444445,
  'Go to sleep': 38.81333333333333,
  'Take nap': 39.98461538461538,
  'Make toast': 39.425,
  'Sleep': 39.857142857142854,
  'Keep an eye on stove as something is cooking': 40.24285714285715,
  'Use computer': 38.8,
  'Have snack': 39.583333333333336,
  'Watch youtube': 40.583333333333336,
  'Send  email': 41.449999999999996,
  'Breakfast': 40.0,
  'Cook some food': 40.412499999999994,
  'Write an email': 40.63636363636363,
  'Make coffee': 39.362500000000004,
  'Cut bread': 40.8,
  'Type up document': 41.31666666666667,
  'Surf net': 41.833333333333336,
  'Chop vegetables': 41.75714285714286,
  'Wash teeth': 40.61111111111111,
  'Wash dishes with dishwasher': 41.190000000000005,
  'Brush teeth': 39.8,
  'Surf internet': 41.3,
  'Put on glasses': 41.54,
  'Sent email': 40.949999999999996,
  'Wash sink': 40.91666666666667,
  'Load dishwasher': 41.7,
  'Change light': 41.0,
  'Oil dining room': 39.900000000000006,
  'Clean mirror': 41.8,
  'W

In [21]:
transition_probability

{'start': {'Watch movie': 0.02387387879850896,
  'Go to sleep': 0.023024912594961762,
  'Take nap': 0.02371974255515113,
  'Make toast': 0.02338776655074804,
  'Sleep': 0.023644123082375953,
  'Keep an eye on stove as something is cooking': 0.023872937176721534,
  'Use computer': 0.023017002971947344,
  'Have snack': 0.023481693324044223,
  'Watch youtube': 0.02407491505012534,
  'Send  email': 0.024589040546062303,
  'Breakfast': 0.023728869043244685,
  'Cook some food': 0.023973573005253143,
  'Write an email': 0.024106373778023577,
  'Make coffee': 0.023350690192867975,
  'Cut bread': 0.024203446424109577,
  'Type up document': 0.02450994431591816,
  'Surf net': 0.024816442207726736,
  'Chop vegetables': 0.02477124436193008,
  'Wash teeth': 0.024091393431405365,
  'Wash dishes with dishwasher': 0.02443480289728122,
  'Brush teeth': 0.02361022469802846,
  'Surf internet': 0.024500057287150136,
  'Put on glasses': 0.024642430501409608,
  'Sent email': 0.024292429683021744,
  'Wash sin