Tic Tac Toe
---
Two players against each other

<img style="float:left" src="board.png" alt="drawing" width="200"/>

# BKT

In [None]:
import argparse
from collections import defaultdict, namedtuple
from io import open
import math
import os
from random import shuffle, uniform
from datetime import datetime
from future.utils import iterkeys, iteritems

from future.builtins import range
from future.utils import iteritems
import numpy as np
import torch

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')
# !tar -xvf '/content/drive/MyDrive/CMU/11785/PROJECT/data_en_es.tar.gz'
# !mkdir en_es
# !mv en_es.s* en_es/ 

In [None]:
### seed everything for reproducibility
def seed_everything():
    seed = 11785
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
try: import wandb
except:
  !pip install wandb -q

In [None]:
import wandb
wandb.login(key="7a48e48cd9bf49f2a1a4c6cb6fe73e77caed0f40")

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
# Sigma is the L2 prior variance, regularizing the baseline model. Smaller sigma means more regularization.
_DEFAULT_SIGMA = 20.0

# Eta is the learning rate/step size for SGD. Larger means larger step size.
_DEFAULT_ETA = 0.1

In [None]:
def load_data(filename):
    """
    This method loads and returns the data in filename. If the data is labelled training data, it returns labels too.

    Parameters:
        filename: the location of the training or test data you want to load.

    Returns:
        data: a list of InstanceData objects from that data type and track.
        labels (optional): if you specified training data, a dict of instance_id:label pairs.
    """

    # 'data' stores a list of 'InstanceData's as values.
    data = []

    # If this is training data, then 'labels' is a dict that contains instance_ids as keys and labels as values.
    training = False
    if filename.find('train') != -1:
        training = True

    if training:
        labels = dict()

    num_exercises = 0
    print('Loading instances...')
    instance_properties = dict()

    with open(filename, 'rt') as f:
        for line in f:
            line = line.strip()

            # If there's nothing in the line, then we're done with the exercise. Print if needed, otherwise continue
            if len(line) == 0:
                num_exercises += 1
                if num_exercises % 100000 == 0:
                    print('Loaded ' + str(len(data)) + ' instances across ' + str(num_exercises) + ' exercises...')
                instance_properties = dict()

            # If the line starts with #, then we're beginning a new exercise
            elif line[0] == '#':
                if 'prompt' in line:
                    instance_properties['prompt'] = line.split(':')[1]
                else:
                    list_of_exercise_parameters = line[2:].split()
                    for exercise_parameter in list_of_exercise_parameters:
                        [key, value] = exercise_parameter.split(':')
                        if key == 'countries':
                            value = value.split('|')
                        elif key == 'days':
                            value = float(value)
                        elif key == 'time':
                            if value == 'null':
                                value = None
                            else:
                                assert '.' not in value
                                value = int(value)
                        instance_properties[key] = value

            # Otherwise we're parsing a new Instance for the current exercise
            else:
                line = line.split()
                if training:
                    assert len(line) == 7
                else:
                    assert len(line) == 6
                assert len(line[0]) == 12

                instance_properties['instance_id'] = line[0]

                instance_properties['token'] = line[1]
                instance_properties['part_of_speech'] = line[2]

                instance_properties['morphological_features'] = dict()
                for l in line[3].split('|'):
                    [key, value] = l.split('=')
                    if key == 'Person':
                        value = int(value)
                    instance_properties['morphological_features'][key] = value

                instance_properties['dependency_label'] = line[4]
                instance_properties['dependency_edge_head'] = int(line[5])
                if training:
                    label = float(line[6])
                    labels[instance_properties['instance_id']] = label
                data.append(InstanceData(instance_properties=instance_properties))

        print('Done loading ' + str(len(data)) + ' instances across ' + str(num_exercises) +
              ' exercises.\n')

    if training:
        return data, labels
    else:
        return data

In [None]:
instance.prompt, instance.token, instance.part_of_speech , instance.morphological_features 

('¿Un mes o un año?', 'year', 'NOUN', {'Number': 'Sing', 'fPOS': 'NOUN++NN'})

In [None]:
class InstanceData(object):
    """
    A bare-bones class to store the included properties of each instance. This is meant to act as easy access to the
    data, and provides a launching point for deriving your own features from the data.
    """
    def __init__(self, instance_properties):

        # Parameters specific to this instance
        self.instance_id = instance_properties['instance_id']
        self.token = instance_properties['token']
        self.part_of_speech = instance_properties['part_of_speech']
        self.morphological_features = instance_properties['morphological_features']
        self.dependency_label = instance_properties['dependency_label']
        self.dependency_edge_head = instance_properties['dependency_edge_head']

        # Derived parameters specific to this instance
        self.exercise_index = int(self.instance_id[8:10])
        self.token_index = int(self.instance_id[10:12])

        # Derived parameters specific to this exercise
        self.exercise_id = self.instance_id[:10]

        # Parameters shared across the whole session
        self.user = instance_properties['user']
        self.countries = instance_properties['countries']
        self.days = instance_properties['days']
        self.client = instance_properties['client']
        self.session = instance_properties['session']
        self.format = instance_properties['format']
        self.time = instance_properties['time']
        self.prompt = instance_properties.get('prompt', None)

        # Derived parameters shared across the whole session
        self.session_id = self.instance_id[:8]

    def to_features(self):
        """
        Prepares those features that we wish to use in the LogisticRegression example in this file. We introduce a bias,
        and take a few included features to use. Note that this dict restructures the corresponding features of the
        input dictionary, 'instance_properties'.

        Returns:
            to_return: a representation of the features we'll use for logistic regression in a dict. A key/feature is a
                key/value pair of the original 'instance_properties' dict, and we encode this feature as 1.0 for 'hot'.
        """
        to_return = dict()

        to_return['bias'] = 1.0
        to_return['user:' + self.user] = 1.0
        to_return['format:' + self.format] = 1.0
        to_return['token:' + self.token.lower()] = 1.0

        to_return['part_of_speech:' + self.part_of_speech] = 1.0
        for morphological_feature in self.morphological_features:
            to_return['morphological_feature:' + morphological_feature] = 1.0
        to_return['dependency_label:' + self.dependency_label] = 1.0
        
        time = datetime.now()
        if(time.second %10 == 0 and time.microsecond == 0):
          print(time)
          
        return to_return

In [None]:
def load_labels(filename):
    """
    This loads labels, either the actual ones or your predictions.

    Parameters:
        filename: the filename pointing to your labels

    Returns:
        labels: a dict of instance_ids as keys and labels between 0 and 1 as values
    """
    labels = dict()

    with open(filename, 'rt') as f:
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            else:
                line = line.split()
            instance_id = line[0]
            label = float(line[1])
            labels[instance_id] = label
    return labels


def compute_acc(actual, predicted):
    """
    Computes the accuracy of your predictions, using 0.5 as a cutoff.

    Note that these inputs are lists, not dicts; they assume that actual and predicted are in the same order.

    Parameters (here and below):
        actual: a list of the actual labels
        predicted: a list of your predicted labels
    """
    num = len(actual)
    acc = 0.
    for i in range(num):
        if round(actual[i], 0) == round(predicted[i], 0):
            acc += 1.
    acc /= num
    return acc


def compute_avg_log_loss(actual, predicted):
    """
    Computes the average log loss of your predictions.
    """
    num = len(actual)
    loss = 0.

    for i in range(num):
        p = predicted[i] if actual[i] > .5 else 1. - predicted[i]
        loss -= math.log(p)
    loss /= num
    return loss


def compute_auroc(actual, predicted):
    """
    Computes the area under the receiver-operator characteristic curve.
    This code a rewriting of code by Ben Hamner, available here:
    https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/auc.py
    """
    num = len(actual)
    temp = sorted([[predicted[i], actual[i]] for i in range(num)], reverse=True)

    sorted_predicted = [row[0] for row in temp]
    sorted_actual = [row[1] for row in temp]

    sorted_posterior = sorted(zip(sorted_predicted, range(len(sorted_predicted))))
    r = [0 for k in sorted_predicted]
    cur_val = sorted_posterior[0][0]
    last_rank = 0
    for i in range(len(sorted_posterior)):
        if cur_val != sorted_posterior[i][0]:
            cur_val = sorted_posterior[i][0]
            for j in range(last_rank, i):
                r[sorted_posterior[j][1]] = float(last_rank+1+i)/2.0
            last_rank = i
        if i==len(sorted_posterior)-1:
            for j in range(last_rank, i+1):
                r[sorted_posterior[j][1]] = float(last_rank+i+2)/2.0

    num_positive = len([0 for x in sorted_actual if x == 1])
    num_negative = num - num_positive
    sum_positive = sum([r[i] for i in range(len(r)) if sorted_actual[i] == 1])
    auroc = ((sum_positive - num_positive * (num_positive + 1) / 2.0) / (num_negative * num_positive))

    return auroc


def compute_f1(actual, predicted, cutoff = 0.5):
    """
    Computes the F1 score of your predictions. Note that we use 0.5 as the cutoff here.
    """
    num = len(actual)

    true_positives = 0
    false_positives = 0
    false_negatives = 0
    true_negatives = 0

    for i in range(num):
        if actual[i] >= cutoff and predicted[i] >= cutoff:
            true_positives += 1
        elif actual[i] < cutoff and predicted[i] >= cutoff:
            false_positives += 1
        elif actual[i] >= cutoff and predicted[i] < cutoff:
            false_negatives += 1
        else:
            true_negatives += 1

    try:
        precision = true_positives / (true_positives + false_positives)
        print(precision)
        recall = true_positives / (true_positives + false_negatives)
        print(recall)
        F1 = 2 * precision * recall / (precision + recall)
    except ZeroDivisionError:
        F1 = 0.0

    return F1


def evaluate_metrics(actual, predicted):
    """
    This computes and returns a dictionary of notable evaluation metrics for your predicted labels.
    """
    acc = compute_acc(actual, predicted)
    avg_log_loss = compute_avg_log_loss(actual, predicted)
    auroc = compute_auroc(actual, predicted)
    F1 = compute_f1(actual, predicted)

    return  acc, avg_log_loss,  auroc, F1


def test_metrics():
    actual = [1, 0, 0, 1, 1, 0, 0, 1, 0, 1]
    predicted = [0.8, 0.2, 0.6, 0.3, 0.1, 0.2, 0.3, 0.9, 0.2, 0.7]
    metrics = evaluate_metrics(actual, predicted)
    metrics = {key: round(metrics[key], 3) for key in iterkeys(metrics)}
    assert metrics['accuracy'] == 0.700
    assert metrics['avglogloss'] == 0.613
    assert metrics['auroc'] == 0.740
    assert metrics['F1'] == 0.667
    print('Verified that our environment is calculating metrics correctly.')

In [None]:
training_data, training_labels = load_data("/content/en_es/en_es.slam.20190204.train")

Loading instances...


KeyboardInterrupt: ignored

In [None]:
filename = "/content/en_es/en_es.slam.20190204.train"
# If this is training data, then 'labels' is a dict that contains instance_ids as keys and labels as values.
training = False
if filename.find('train') != -1:
    training = True

if training:
    labels = dict()
f = open(filename, 'rt')
for line in f:
    line = line.strip()
    break

In [None]:
for i, line in enumerate(f):
    if i<1000:
        continue
    line = line.strip()
    print(line)

    if i==1100:
        break


# prompt:Yo escribo libros.
# user:UoWHujMy  countries:ES  days:1.722  client:android  session:lesson  format:reverse_tap  time:8
uDy5yvhV0401  I             PRON    Case=Nom|Number=Sing|Person=1|PronType=Prs|fPOS=PRON++PRP               nsubj        2  0
uDy5yvhV0402  write         VERB    Mood=Ind|Tense=Pres|VerbForm=Fin|fPOS=VERB++VBP                         ROOT         0  0
uDy5yvhV0403  books         NOUN    Number=Plur|fPOS=NOUN++NNS                                              dobj         2  0

# prompt:Ellos caminan.
# user:UoWHujMy  countries:ES  days:1.722  client:android  session:lesson  format:reverse_tap  time:6
uDy5yvhV0501  They          PRON    Case=Nom|Number=Plur|Person=3|PronType=Prs|fPOS=PRON++PRP               nsubj        2  0
uDy5yvhV0502  walk          VERB    Mood=Ind|Tense=Pres|VerbForm=Fin|fPOS=VERB++VBP                         ROOT         0  0

# prompt:Las niñas escriben.
# user:UoWHujMy  countries:ES  days:1.722  client:android  session:lesson  format:r

In [None]:
instance_properties

{'prompt': 'Yo soy un niño.',
 'user': 'XEinXf5+',
 'countries': ['CO'],
 'days': 0.003,
 'client': 'web',
 'session': 'lesson',
 'format': 'reverse_translate',
 'time': 9,
 'instance_id': 'DRihrVmh0103',
 'token': 'a',
 'part_of_speech': 'DET',
 'morphological_features': {'Definite': 'Ind',
  'PronType': 'Art',
  'fPOS': 'DET++DT'},
 'dependency_label': 'det',
 'dependency_edge_head': 4}

In [None]:
# def load_data(filename):
if True:
    """
    This method loads and returns the data in filename. If the data is labelled training data, it returns labels too.

    Parameters:
        filename: the location of the training or test data you want to load.

    Returns:
        data: a list of InstanceData objects from that data type and track.
        labels (optional): if you specified training data, a dict of instance_id:label pairs.
    """

    # 'data' stores a list of 'InstanceData's as values.
    data = []

    # If this is training data, then 'labels' is a dict that contains instance_ids as keys and labels as values.
    training = False
    if filename.find('train') != -1:
        training = True

    if training:
        labels = dict()

    num_exercises = 0
    print('Loading instances...')
    instance_properties = dict()

    with open(filename, 'rt') as f:
        for i,line in enumerate(f):
            if i ==5:
              break
            line = line.strip()

            # If there's nothing in the line, then we're done with the exercise. Print if needed, otherwise continue
            if len(line) == 0:
                num_exercises += 1
                if num_exercises % 100000 == 0:
                    print('Loaded ' + str(len(data)) + ' instances across ' + str(num_exercises) + ' exercises...')
                instance_properties = dict()

            # If the line starts with #, then we're beginning a new exercise
            elif line[0] == '#':
                if 'prompt' in line:
                    instance_properties['prompt'] = line.split(':')[1]
                else:
                    list_of_exercise_parameters = line[2:].split()
                    for exercise_parameter in list_of_exercise_parameters:
                        [key, value] = exercise_parameter.split(':')
                        if key == 'countries':
                            value = value.split('|')
                        elif key == 'days':
                            value = float(value)
                        elif key == 'time':
                            if value == 'null':
                                value = None
                            else:
                                assert '.' not in value
                                value = int(value)
                        instance_properties[key] = value

            # Otherwise we're parsing a new Instance for the current exercise
            else:
                line = line.split()
                if training:
                    assert len(line) == 7
                else:
                    assert len(line) == 6
                assert len(line[0]) == 12

                instance_properties['instance_id'] = line[0]

                instance_properties['token'] = line[1]
                instance_properties['part_of_speech'] = line[2]

                instance_properties['morphological_features'] = dict()
                for l in line[3].split('|'):
                    [key, value] = l.split('=')
                    if key == 'Person':
                        value = int(value)
                    instance_properties['morphological_features'][key] = value

                instance_properties['dependency_label'] = line[4]
                instance_properties['dependency_edge_head'] = int(line[5])
                if training:
                    label = float(line[6])
                    labels[instance_properties['instance_id']] = label
                data.append(InstanceData(instance_properties=instance_properties))

        print('Done loading ' + str(len(data)) + ' instances across ' + str(num_exercises) +
              ' exercises.\n')

    # if training:
    #     return data, labels
    # else:
    #     return data

Loading instances...
Done loading 3 instances across 0 exercises.



In [None]:
inss = data[0]

In [None]:
inss

<__main__.InstanceData at 0x7ff457cb7820>

In [None]:
test_data = load_data("/content/en_es/en_es.slam.20190204.dev")

Loading instances...
Loaded 334439 instances across 100000 exercises...
Done loading 387374 instances across 115770 exercises.



In [None]:
training_data[0].to_features()

{'bias': 1.0,
 'user:XEinXf5+': 1.0,
 'format:reverse_translate': 1.0,
 'token:i': 1.0,
 'part_of_speech:PRON': 1.0,
 'morphological_feature:Case': 1.0,
 'morphological_feature:Number': 1.0,
 'morphological_feature:Person': 1.0,
 'morphological_feature:PronType': 1.0,
 'morphological_feature:fPOS': 1.0,
 'dependency_label:nsubj': 1.0}

In [None]:
#Convert into data that can be used to train the BKT agent

exercices = {}
word_dict = {}
pos_dict = {}
format_dict = {}
dependency_label_dict = {}
morphological_feature_dict = {}


unique_word_index = 0;
unique_pos_index = 0;
unique_format_index = 0;
unique_dependency_label_index = 0;
unique_morphological_feature_index = 0;

for instance in training_data:
  user = instance.user
  instance_id = instance.instance_id[:-2]
  if user not in exercices:
    exercices[user] = {}
  if instance_id not in exercices[user] :
    exercices[user][instance_id] = []
  
  token = instance.token.lower()
  part_of_speech =  instance.part_of_speech.lower()
  format = instance.format
  dependency_label = instance.dependency_label
  morphological_features = instance.morphological_features

  token_info = []

  if token in word_dict:
    #exercices[user][instance_id].append(word_dict[token])
    pass
  else:
    word_dict[token] = unique_word_index
    #exercices[user][instance_id].append(word_dict[token])
    unique_word_index += 1
  
  token_info.append(word_dict[token])

  if part_of_speech in pos_dict:
    #exercices[user][instance_id].append(pos_dict[part_of_speech])
    pass
  else:
    pos_dict[part_of_speech] = unique_pos_index
    #exercices[user][instance_id].append(pos_dict[part_of_speech])
    unique_pos_index += 1
  token_info.append(pos_dict[part_of_speech])

  if format in format_dict:
    #exercices[user][instance_id].append(format_dict[format])
    pass
  else:
    format_dict[format] = unique_format_index
    #exercices[user][instance_id].append(format_dict[format])
    unique_format_index += 1
  
  token_info.append(format_dict[format])

  if dependency_label in dependency_label_dict:
    #exercices[user][instance_id].append(dependency_label_dict[dependency_label])
    pass
  else:
    dependency_label_dict[dependency_label] = unique_dependency_label_index
    #exercices[user][instance_id].append(dependency_label_dict[dependency_label])
    unique_dependency_label_index += 1
  
  token_info.append(dependency_label_dict[dependency_label])


  morphology = []
  for feature_key, feature_val in morphological_features.items():
    key_val = str(feature_key) + ":" + str(feature_val)
    if key_val in morphological_feature_dict:
      morphology.append(morphological_feature_dict[key_val])
    else:
      morphological_feature_dict[key_val]= unique_morphological_feature_index
      unique_morphological_feature_index +=1 
      morphology.append(morphological_feature_dict[key_val])

  token_info.append(morphology)
  exercices[user][instance_id].append(token_info)



In [None]:
inverse_word_dict = {}
for key in word_dict:
    inverse_word_dict[word_dict[key]] = key

In [None]:
exercices

{'lmFO3N5m01': [[2, 2, 0, 2, [9, 10, 11]], [3, 3, 0, 3, [1, 12]]],
 'vLeyJXe/01': [[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [1, 1, 0, 1, [5, 1, 2, 6, 7, 8]],
  [4, 4, 0, 4, [13]],
  [5, 5, 0, 3, [1, 14]]],
 'c41/o18u01': [[6, 0, 0, 5, [1, 2, 15, 3, 16]],
  [7, 3, 0, 0, [1, 12]],
  [8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [846, 5, 0, 3, [1, 14]]],
 'c41/o18u02': [[10, 0, 0, 0, [1, 14]],
  [8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [2, 2, 0, 2, [9, 10, 11]],
  [11, 3, 0, 3, [1, 12]]],
 'c41/o18u03': [[52, 0, 0, 3, [20, 35]]],
 'gyucMtCl01': [[32, 9, 0, 3, [28]],
  [30, 8, 0, 11, [27]],
  [28, 7, 0, 10, [22, 23]],
  [29, 3, 0, 12, [1, 12]]],
 'gyucMtCl02': [[28, 7, 0, 10, [22, 23]],
  [29, 3, 0, 3, [1, 12]],
  [30, 8, 0, 11, [27]],
  [33, 3, 0, 12, [1, 12]]],
 'gyucMtCl03': [[28, 7, 0, 10, [22, 23]], [31, 3, 0, 3, [1, 12]]],
 '/aMEs/Ja01': [[10, 0, 0, 0, [0, 38, 1, 17, 3, 4]],
  [8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [78, 6, 0, 7, [24]],
  [28, 7, 0, 3, [22, 23]]],
 'VeDUmsN901': [[40, 4, 0, 4, [13]], [5,

In [None]:
exercices_merged = {}
for user in exercices:
  exercices_merged[user] = list(exercices[user].values())

<__main__.InstanceData at 0x7ff33d1ab730>

In [None]:
test_exercices = {}
for instance in test_data:
  user = instance.user
  instance_id = instance.instance_id[:-2]
  if user not in test_exercices:
    test_exercices[user] = {}
  if instance_id not in test_exercices[user] :
    test_exercices[user][instance_id] = []
  token = instance.token.lower()
  
  part_of_speech =  instance.part_of_speech.lower()
  format = instance.format
  dependency_label = instance.dependency_label
  morphological_features = instance.morphological_features

  token_info = []
  if token in word_dict:
    token_info.append(word_dict[token])
  else:
    token_info.append(-1)
  token_info.append(pos_dict[part_of_speech])
  token_info.append(format_dict[format])
  token_info.append(dependency_label_dict[dependency_label])


  morphology = []
  for feature_key, feature_val in morphological_features.items():
    key_val = str(feature_key) + ":" + str(feature_val)
    morphology.append(morphological_feature_dict[key_val])
  token_info.append(morphology)

  test_exercices[user][instance_id].append(token_info)


test_exercices_merged = {}
for user in test_exercices:
  test_exercices_merged[user] = list(test_exercices[user].values())

# Test BKT

In [None]:
test_exercices = {}
for instance in test_data:
  user = instance.user
  instance_id = instance.instance_id[:-2]
  if user not in test_exercices:
    test_exercices[user] = {}
  if instance_id not in test_exercices[user] :
    test_exercices[user][instance_id] = []
  token = instance.token.lower()
  
  part_of_speech =  instance.part_of_speech.lower()
  format = instance.format
  dependency_label = instance.dependency_label
  morphological_features = instance.morphological_features

  token_info = []
  if token in word_dict:
    token_info.append(word_dict[token])
  else:
    token_info.append(-1)
  token_info.append(pos_dict[part_of_speech])
  token_info.append(format_dict[format])
  token_info.append(dependency_label_dict[dependency_label])


  morphology = []
  for feature_key, feature_val in morphological_features.items():
    key_val = str(feature_key) + ":" + str(feature_val)
    morphology.append(morphological_feature_dict[key_val])
  token_info.append(morphology)

  test_exercices[user][instance_id].append(token_info)


test_exercices_merged = {}
for user in test_exercices:
  test_exercices_merged[user] = list(test_exercices[user].values())

In [None]:
class BKTLearner(object):
    
    def __init__(self, token_state_size, pos_state_size, format_state_size, dependency_state_size, morphological_state_size, slip_prob, transition_prob, guess_prob):
        self.token_state_size = token_state_size
        self.pos_state_size = pos_state_size
        self.format_state_size = format_state_size
        self.dependency_state_size = dependency_state_size
        self.morphological_state_size = morphological_state_size

        self.token_state = np.full(self.token_state_size,0.2, dtype = np.float32)
        self.pos_state = np.full(self.pos_state_size,0.2,dtype = np.float32)
        self.format_state = np.full(self.format_state_size,0.2,dtype = np.float32)
        self.dependency_state = np.full(self.dependency_state_size,0.2,dtype = np.float32)
        self.morphological_state = np.full(self.morphological_state_size,0.2,dtype = np.float32)

        self.slip_prob = slip_prob
        self.transition_prob = transition_prob
        self.guess_prob = guess_prob
    
    def reset(self):
        self.state = np.zeros(self.state_size)

    def getState(self):
        return np.concatenate(
            [       
              self.token_state,
              self.pos_state ,
              self.format_state ,
              self.dependency_state ,
              self.morphological_state 
            ]
        )
    def getStateHash(self):
        return str(self.getState())


    def getNetLearnedProb(self,token_info):
      token_index = token_info[0]
      pos_index = token_info[1]
      format_index = token_info[2]
      dep_index = token_info[3]
      morphological_indices = token_info[4]
      net_learned = self.token_state[token_index] * self.pos_state[pos_index] #* self.format_state[format_index] * self.dependency_state[dep_index]
      for index in morphological_indices:
       net_learned *= self.morphological_state[index]

      return net_learned

    
    def predictAnswerProbabilities(self, input):
        answer = []
        for token_info in input:
            net_learned = self.getNetLearnedProb(token_info)
            p_correct = net_learned * (1 - self.slip_prob) + (1 - net_learned) * self.guess_prob 
            #value = np.random.choice(np.array([0,1]), p = np.array([1 - p_correct, p_correct]))
            answer.append(p_correct)
        return np.array(answer)



    def predictAnswer(self, input):
        answer = []
        for token_info in input:
            net_learned = self.getNetLearnedProb(token_info)
            #print(net_learned)
            p_correct = net_learned * (1 - self.slip_prob) + (1 - net_learned) * self.guess_prob 
            value = np.random.choice(np.array([0,1]), p = np.array([1 - p_correct, p_correct]))
            answer.append(value)
        return np.array(answer)
    
    def getPosterior(self, prob, output_correctness):
      if output_correctness == 1:
        posterior = prob*(1 - self.slip_prob) / (prob*(1 - self.slip_prob) + (1 - prob)*self.guess_prob)
      else:
        posterior = prob*(self.slip_prob) / (prob*(self.slip_prob) + (1 - prob)*(1 - self.guess_prob))
      return posterior

    def updateKnowledgeState(self, output_correctness, input):
        i = 0
        for token_info in input:
            #print(token_info[0])
            #print(word_dict[token_info[0]])
            token_posterior = self.getPosterior(self.token_state[token_info[0]], output_correctness[i])
            #print("post " + str(token_posterior + (1 - token_posterior) * self.transition_prob))
            self.token_state[token_info[0]] = token_posterior + (1 - token_posterior) * self.transition_prob
            #print(self.token_state[token_info[0]])
            

            pos_posterior = self.getPosterior(self.pos_state[token_info[1]], output_correctness[i])
            self.pos_state[token_info[1]] = pos_posterior + (1 - pos_posterior) * self.transition_prob

            format_posterior = self.getPosterior(self.format_state[token_info[2]], output_correctness[i])
            self.format_state[token_info[2]] = format_posterior + (1 - format_posterior) *  self.transition_prob

            dep_posterior = self.getPosterior(self.dependency_state[token_info[3]], output_correctness[i])
            self.dependency_state[token_info[3]] = dep_posterior + (1 - dep_posterior) * self.transition_prob

            for index in token_info[4]:
                morpho_posterior = self.getPosterior(self.morphological_state[index], output_correctness[i])
                self.morphological_state[index] = morpho_posterior + (1 - morpho_posterior) * self.transition_prob
            i += 1 

    def trainOneSet(self, excercises):
        for exercise in excercises:
            answer_correctness = self.predictAnswer(exercise)
            self.updateKnowledgeState(answer_correctness, exercise)
    
    def testOneSetProbabilities(self, excercises):
        answer_correctness = []
        for exercise in excercises:
            answer_correctness_ex = self.predictAnswerProbabilities(exercise)
            answer_correctness.append(answer_correctness_ex)
        return np.array(answer_correctness)

    def testOneSet(self, excercises):
        answer_correctness = []
        for exercise in excercises:
            answer_correctness_ex = self.predictAnswer(exercise)
            answer_correctness.append(answer_correctness_ex)
        return np.array(answer_correctness)
        
    def computeAccuracyForTest(self, test_response):
        correct = 0;
        total = 0;
        for exercise in test_response:
            for token in exercise:
                correct += token
                total += 1
        if(total == 0):
          return 0
        return float(correct)/total * 100
    
    def train(self, exercices_all, train_duration, test_duration):
        i = 0;
        accuracy = 0
        batch = 0
        cummulative_reward = 0;
        while i < len(exercices_all):
            if(train_duration + i < len(exercices_all)):
              train_batch = exercices_all[i:train_duration + i]
            else: 
              train_batch = exercices_all[i:]
            i += train_duration
            if(i + test_duration < len(exercices_all)):
              test_batch = exercices_all[i:i + test_duration]
            else:
               test_batch = exercices_all[i:]
            i += test_duration

            answer_correctness_before = self.testOneSetProbabilities(test_batch)
            #print(answer_correctness_before)
            answer_correctness_before = [np.where(answer_correctness_before[i] >= 0.5 , 1, 0 ) for i in range(len(answer_correctness_before))]
            self.trainOneSet(train_batch)
            answer_correctness = self.testOneSetProbabilities(test_batch)
            answer_correctness = [np.where(answer_correctness[i] >= 0.5 , 1, 0 ) for i in range(len(answer_correctness))]

            accuracy_before = self.computeAccuracyForTest(answer_correctness_before) 
            accuracy = self.computeAccuracyForTest(answer_correctness) 
            print("Batch + " + str(batch) + " " + " correct before: " + str(accuracy_before) + " correct after: " + str(accuracy))
            cummulative_reward += accuracy - accuracy_before
            wandb.log({'Batch Accuracy After': accuracy, 'Batch Accuracy Before': accuracy_before, "reward": accuracy - accuracy_before, "cummulative reward":cummulative_reward})
            batch += 1

In [None]:
try: print(run)
except:
    run = wandb.init(
        name = "Reward Changes BKT - RL-TableBased", ## Wandb creates random run names if you skip this field
        reinit = True, ### Allows reinitalizing runs when you re-run this cell
        # run_id = ### Insert specific run id here if you want to resume a previous run
        # resume = "must" ### You need this to resume previous runs, but comment out reinit = True when using this
        project = "BaseLine Ablations" ### Project should be created in your wandb account 
        #config = config ### Wandb Config for your run
    )

<wandb.sdk.wandb_run.Run object at 0x7ff303afa7f0>


In [None]:
all_ex = []
for token_list in exercices_merged.values():
  all_ex.extend(token_list) 


In [None]:
np.random.shuffle(all_ex)
learner = BKTLearner(len(word_dict),len(pos_dict), len(format_dict), len(dependency_label_dict),len(morphological_feature_dict), 0.1, 0.1, 0.001)
# learner.train(all_ex, 50, 20)

In [None]:
exercices_merged['YLBfBt3k']

187

In [None]:
keyss = exercices_merged.keys()

In [None]:
exercices_merged['XEinXf5+']

[[[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [1, 1, 0, 1, [5, 1, 2, 6, 7, 8]],
  [2, 2, 0, 2, [9, 10, 11]],
  [3, 3, 0, 3, [1, 12]]],
 [[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [1, 1, 0, 1, [5, 1, 2, 6, 7, 8]],
  [4, 4, 0, 4, [13]],
  [5, 5, 0, 3, [1, 14]]],
 [[6, 0, 0, 5, [1, 2, 15, 3, 16]],
  [7, 3, 0, 0, [1, 12]],
  [8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [9, 5, 0, 3, [1, 14]]],
 [[10, 0, 0, 0, [1, 14]],
  [8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [2, 2, 0, 2, [9, 10, 11]],
  [11, 3, 0, 3, [1, 12]]],
 [[8, 1, 0, 1, [5, 1, 17, 6, 7, 18]],
  [12, 0, 0, 0, [0, 19, 1, 17, 3, 4]],
  [2, 2, 0, 2, [9, 10, 11]],
  [3, 3, 0, 3, [1, 12]]],
 [[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [13, 1, 0, 3, [5, 6, 7, 8]],
  [2, 2, 0, 2, [9, 10, 11]],
  [14, 3, 0, 6, [1, 12]]],
 [[15, 6, 0, 3, [20, 21]]],
 [[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [16, 1, 0, 3, [5, 6, 7, 8]],
  [2, 2, 0, 2, [9, 10, 11]],
  [17, 3, 0, 6, [1, 12]]],
 [[0, 0, 0, 0, [0, 1, 2, 3, 4]],
  [1, 1, 0, 1, [5, 1, 2, 6, 7, 8]],
  [18, 7, 0, 3, [22, 23]]],
 [[19, 6, 1, 3, [2

In [None]:
a = learner.getStateHash()

In [None]:
str(hash(learner.getState().tobytes()))

'-6821411978467925784'

In [None]:
# !pip install -U sentence-transformers


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.98-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m62.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.4.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
sentences = ['The quick brown fox jumps over the lazy dog' ]
embeddings = model.encode(sentences)
for embedding in embeddings:
  print(embedding)

[ 6.18142188e-01 -2.47766361e-01 -2.35646576e-01 -1.10645533e-01
 -1.04882360e-01 -2.41441205e-01 -5.60291521e-02 -3.00354689e-01
 -2.29932610e-02  1.77721068e-01  2.20474243e-01  3.82652670e-01
  4.48558331e-01 -1.07466340e-01 -2.95623660e-01 -5.68135232e-02
 -3.91452998e-01  1.36096463e-01  1.37871087e-01  9.69017390e-03
 -2.35751629e-01 -5.08761346e-01 -2.31672935e-02  9.46924686e-02
 -7.72723556e-01 -1.64655447e-01 -7.71317840e-01 -4.58485693e-01
  1.26624003e-01 -3.72232795e-01  6.99772775e-01 -4.07931119e-01
 -4.35257554e-01 -7.20831156e-01 -7.07936227e-01  3.66327852e-01
 -4.00922120e-01 -1.83998048e-01 -1.25787079e-01  2.96748709e-02
 -1.53919160e-01  4.97115813e-02  1.04704015e-01  2.14557156e-01
 -6.93094432e-02  3.47619921e-01 -1.13453710e+00 -3.64123434e-01
  1.79999933e-01 -3.19597870e-01  1.38954028e-01  2.88712054e-01
 -2.80651122e-01  1.57439426e-01  7.94158340e-01  2.17851866e-02
  6.92504197e-02  1.28878668e-01 -2.73195922e-01  4.02935416e-01
  4.90810676e-03  2.49019

In [None]:
len(embeddings)

2

In [None]:
BOARD_ROWS = 3
BOARD_COLS = 3

### Board State
---
Reflect & Judge the state

2 players p1 and p2; p1 uses symbol 1 and p2 uses symbol 2, vacancy as 0

In [None]:
class State:
    def __init__(self, p1, p2):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.boardHash = None
        # init p1 plays first
        self.playerSymbol = 1
    
    # get unique hash of current board state
    def getHash(self):
        self.boardHash = str(self.board.reshape(BOARD_COLS*BOARD_ROWS))
        return self.boardHash
    
    def winner(self):
        # row
        for i in range(BOARD_ROWS):
            if sum(self.board[i, :]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[i, :]) == -3:
                self.isEnd = True
                return -1
        # col
        for i in range(BOARD_COLS):
            if sum(self.board[:, i]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[:, i]) == -3:
                self.isEnd = True
                return -1
        # diagonal
        diag_sum1 = sum([self.board[i, i] for i in range(BOARD_COLS)])
        diag_sum2 = sum([self.board[i, BOARD_COLS-i-1] for i in range(BOARD_COLS)])
        # diag_sum = max(diag_sum1, diag_sum2)
        if diag_sum1 == 3 or diag_sum2 == 3:
            self.isEnd = True
            return 1
        if diag_sum1 == -3 or diag_sum2 == -3:
            self.isEnd = True
            return -1
        
        # tie
        # no available positions
        if len(self.availablePositions()) == 0:
            self.isEnd = True
            return 0
        # not end
        self.isEnd = False
        return None
    
    def availablePositions(self):
        positions = []
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                if self.board[i, j] == 0:
                    positions.append((i, j))  # need to be tuple
        return positions
    
    def updateState(self, position):
        self.board[position] = self.playerSymbol
        # switch to another player
        self.playerSymbol = -1 if self.playerSymbol == 1 else 1
    
    # only when game ends
    def giveReward(self):
        result = self.winner()
        # backpropagate reward
        if result == 1:
            self.p1.feedReward(1)
            self.p2.feedReward(0)
        elif result == -1:
            self.p1.feedReward(0)
            self.p2.feedReward(1)
        else:
            self.p1.feedReward(0.1)
            self.p2.feedReward(0.5) # The first player has advantage
    
    # board reset
    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.boardHash = None
        self.isEnd = False
        self.playerSymbol = 1
    
    def play(self, rounds=100):
        for i in range(rounds):
            if i%1000 == 0:
                print("Rounds {}".format(i))
            while not self.isEnd:
                # Player 1
                positions = self.availablePositions()
                p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
                # take action and upate board state
                self.updateState(p1_action)
                board_hash = self.getHash()
                self.p1.addState(board_hash)
                # check board status if it is end

                win = self.winner()
                if win is not None:
                    # self.showBoard()
                    # ended with p1 either win or draw
                    self.giveReward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                else:
                    # Player 2
                    positions = self.availablePositions()
                    p2_action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                    self.updateState(p2_action)
                    board_hash = self.getHash()
                    self.p2.addState(board_hash)
                    
                    win = self.winner()
                    if win is not None:
                        # self.showBoard()
                        # ended with p2 either win or draw
                        self.giveReward()
                        self.p1.reset()
                        self.p2.reset()
                        self.reset()
                        break
    
    # play with human
    def play2(self):
        while not self.isEnd:
            # Player 1
            positions = self.availablePositions()
            p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
            # take action and upate board state
            self.updateState(p1_action)
            self.showBoard()
            # check board status if it is end
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                self.reset()
                break

            else:
                # Player 2
                positions = self.availablePositions()
                p2_action = self.p2.chooseAction(positions)

                self.updateState(p2_action)
                self.showBoard()
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    def showBoard(self):
        # p1: x  p2: o
        for i in range(0, BOARD_ROWS):
            print('-------------')
            out = '| '
            for j in range(0, BOARD_COLS):
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
        print('-------------')    

In [None]:
class Player:
    def __init__(self, name, exp_rate=0.3):
        self.name = name
        self.states = []  # record all positions taken
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}  # state -> value
    
    def getHash(self, board):
        boardHash = str(board.reshape(BOARD_COLS*BOARD_ROWS))
        return boardHash
    
    def chooseAction(self, positions, current_board, symbol):
        if np.random.uniform(0, 1) <= self.exp_rate: # Exploration Rate
            # take random action
            idx = np.random.choice(len(positions))
            action = positions[idx]
        else:
            value_max = -999
            for p in positions:
                next_board = current_board.copy()
                next_board[p] = symbol
                next_boardHash = self.getHash(next_board)
                value = 0 if self.states_value.get(next_boardHash) is None else self.states_value.get(next_boardHash)
                # print("value", value)
                if value >= value_max:
                    value_max = value
                    action = p
        # print("{} takes action {}".format(self.name, action))
        return action
    
    # append a hash state
    def addState(self, state):
        self.states.append(state)
    
    # at the end of game, backpropagate and update states value
    ## DOUBTS HERE: What algorithms is being used here to update the reward for each state.
    ## Technically the state is the knowledge vector.
    ## FeedReward - update the current state_value dictionary.

    ## 

    def feedReward(self, reward):
        for st in reversed(self.states):
            if self.states_value.get(st) is None:
                self.states_value[st] = 0
            self.states_value[st] += self.lr*(self.decay_gamma*reward - self.states_value[st]) # CHECK_ Why are you negating with the existing value of  self.states_value[st]
            # The reward for this state is updated. The next state reward is based on this state and not exactly on the reward.
            reward = self.states_value[st]
            # CHECK_ Why are we not updating the decay_gamma when we go back
            
    def reset(self):
        self.states = []
        
    def savePolicy(self):
        fw = open('policy_' + str(self.name), 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def loadPolicy(self, file):
        fr = open(file,'rb')
        self.states_value = pickle.load(fr)
        fr.close()

In [None]:
class HumanPlayer:
    def __init__(self, name):
        self.name = name 
    
    def chooseAction(self, positions):
        while True:
            row = int(input("Input your action row:"))
            col = int(input("Input your action col:"))
            action = (row, col)
            if action in positions:
                return action
    
    # append a hash state
    def addState(self, state):
        pass
    
    # at the end of game, backpropagate and update states value
    def feedReward(self, reward):
        pass
            
    def reset(self):
        pass

### Training

In [None]:
p1 = Player("p1")
p2 = Player("p2")

st = State(p1, p2)
print("training...")
st.play(50000)

training...
Rounds 0
Rounds 1000
Rounds 2000
Rounds 3000
Rounds 4000
Rounds 5000
Rounds 6000
Rounds 7000
Rounds 8000
Rounds 9000
Rounds 10000
Rounds 11000
Rounds 12000
Rounds 13000
Rounds 14000
Rounds 15000
Rounds 16000
Rounds 17000
Rounds 18000
Rounds 19000
Rounds 20000
Rounds 21000
Rounds 22000
Rounds 23000
Rounds 24000
Rounds 25000
Rounds 26000
Rounds 27000
Rounds 28000
Rounds 29000
Rounds 30000
Rounds 31000
Rounds 32000
Rounds 33000
Rounds 34000
Rounds 35000
Rounds 36000
Rounds 37000
Rounds 38000
Rounds 39000
Rounds 40000
Rounds 41000
Rounds 42000
Rounds 43000
Rounds 44000
Rounds 45000
Rounds 46000
Rounds 47000
Rounds 48000
Rounds 49000


In [None]:
p1.savePolicy()
p2.savePolicy()

In [None]:
p1.loadPolicy("policy_p1")

### Human vs Computer

In [None]:
p1 = Player("computer", exp_rate=0)
p1.loadPolicy("policy_p1")

p2 = HumanPlayer("human")

st = State(p1, p2)
st.play2()

-------------
|   |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
Input your action row:0
Input your action col:0
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
Input your action row:0
Input your action col:2
-------------
| o |   | o | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
Input your action row:2
Input your action col:1
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
|   | o | x | 
-------------
-------------
| o | x | o | 
-------------
|   | x | x | 
-------------
|   | o | x | 
-------------
Input your action row:1
Input your action col:0
-------------
| o | x | o | 
-------------
| o | x | x | 
-------------
|   | o | x | 
-------------


In [None]:
p1 = Player("computer", exp_rate=0.2)
p1.loadPolicy("policy_p1")

p2 = HumanPlayer("human")

st = State(p1, p2)
st.play2()

-------------
|   |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
Input your action row:0
Input your action col:0
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
|   |   |   | 
-------------
-------------
| o |   |   | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
Input your action row:0
Input your action col:2
-------------
| o |   | o | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
|   |   | x | 
-------------
Input your action row:2
Input your action col:1
-------------
| o | x | o | 
-------------
|   | x |   | 
-------------
|   | o | x | 
-------------
-------------
| o | x | o | 
-------------
|   | x | x | 
-------------
|   | o | x | 
-------------
Input your action row:1
Input your action col:0
-------------
| o | x | o | 
-------------
| o | x | x | 
-------------
|   | o | x | 
-------------
