In [None]:
import argparse
from collections import defaultdict, namedtuple
from io import open
import math
import os
from random import shuffle, uniform
from datetime import datetime
from future.utils import iterkeys, iteritems

from future.builtins import range
from future.utils import iteritems
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!tar -xvf '/content/drive/MyDrive/CMU/11785/PROJECT/data_en_es.tar.gz'
!mkdir en_es
!mv en_es.s* en_es/ 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
### seed everything for reproducibility
def seed_everything():
    seed = 11785
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [None]:
try: import wandb
except:
  !pip install wandb -q

In [None]:
import wandb
wandb.login(key="7a48e48cd9bf49f2a1a4c6cb6fe73e77caed0f40")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
# Sigma is the L2 prior variance, regularizing the baseline model. Smaller sigma means more regularization.
_DEFAULT_SIGMA = 20.0

# Eta is the learning rate/step size for SGD. Larger means larger step size.
_DEFAULT_ETA = 0.1

In [None]:
def load_data(filename):
    """
    This method loads and returns the data in filename. If the data is labelled training data, it returns labels too.

    Parameters:
        filename: the location of the training or test data you want to load.

    Returns:
        data: a list of InstanceData objects from that data type and track.
        labels (optional): if you specified training data, a dict of instance_id:label pairs.
    """

    # 'data' stores a list of 'InstanceData's as values.
    data = []

    # If this is training data, then 'labels' is a dict that contains instance_ids as keys and labels as values.
    training = False
    if filename.find('train') != -1:
        training = True

    if training:
        labels = dict()

    num_exercises = 0
    print('Loading instances...')
    instance_properties = dict()

    with open(filename, 'rt') as f:
        for line in f:
            line = line.strip()

            # If there's nothing in the line, then we're done with the exercise. Print if needed, otherwise continue
            if len(line) == 0:
                num_exercises += 1
                if num_exercises % 100000 == 0:
                    print('Loaded ' + str(len(data)) + ' instances across ' + str(num_exercises) + ' exercises...')
                instance_properties = dict()

            # If the line starts with #, then we're beginning a new exercise
            elif line[0] == '#':
                if 'prompt' in line:
                    instance_properties['prompt'] = line.split(':')[1]
                else:
                    list_of_exercise_parameters = line[2:].split()
                    for exercise_parameter in list_of_exercise_parameters:
                        [key, value] = exercise_parameter.split(':')
                        if key == 'countries':
                            value = value.split('|')
                        elif key == 'days':
                            value = float(value)
                        elif key == 'time':
                            if value == 'null':
                                value = None
                            else:
                                assert '.' not in value
                                value = int(value)
                        instance_properties[key] = value

            # Otherwise we're parsing a new Instance for the current exercise
            else:
                line = line.split()
                if training:
                    assert len(line) == 7
                else:
                    assert len(line) == 6
                assert len(line[0]) == 12

                instance_properties['instance_id'] = line[0]

                instance_properties['token'] = line[1]
                instance_properties['part_of_speech'] = line[2]

                instance_properties['morphological_features'] = dict()
                for l in line[3].split('|'):
                    [key, value] = l.split('=')
                    if key == 'Person':
                        value = int(value)
                    instance_properties['morphological_features'][key] = value

                instance_properties['dependency_label'] = line[4]
                instance_properties['dependency_edge_head'] = int(line[5])
                if training:
                    label = float(line[6])
                    labels[instance_properties['instance_id']] = label
                data.append(InstanceData(instance_properties=instance_properties))

        print('Done loading ' + str(len(data)) + ' instances across ' + str(num_exercises) +
              ' exercises.\n')

    if training:
        return data, labels
    else:
        return data

In [None]:
class InstanceData(object):
    """
    A bare-bones class to store the included properties of each instance. This is meant to act as easy access to the
    data, and provides a launching point for deriving your own features from the data.
    """
    def __init__(self, instance_properties):

        # Parameters specific to this instance
        self.instance_id = instance_properties['instance_id']
        self.token = instance_properties['token']
        self.part_of_speech = instance_properties['part_of_speech']
        self.morphological_features = instance_properties['morphological_features']
        self.dependency_label = instance_properties['dependency_label']
        self.dependency_edge_head = instance_properties['dependency_edge_head']

        # Derived parameters specific to this instance
        self.exercise_index = int(self.instance_id[8:10])
        self.token_index = int(self.instance_id[10:12])

        # Derived parameters specific to this exercise
        self.exercise_id = self.instance_id[:10]

        # Parameters shared across the whole session
        self.user = instance_properties['user']
        self.countries = instance_properties['countries']
        self.days = instance_properties['days']
        self.client = instance_properties['client']
        self.session = instance_properties['session']
        self.format = instance_properties['format']
        self.time = instance_properties['time']
        self.prompt = instance_properties.get('prompt', None)

        # Derived parameters shared across the whole session
        self.session_id = self.instance_id[:8]

    def to_features(self):
        """
        Prepares those features that we wish to use in the LogisticRegression example in this file. We introduce a bias,
        and take a few included features to use. Note that this dict restructures the corresponding features of the
        input dictionary, 'instance_properties'.

        Returns:
            to_return: a representation of the features we'll use for logistic regression in a dict. A key/feature is a
                key/value pair of the original 'instance_properties' dict, and we encode this feature as 1.0 for 'hot'.
        """
        to_return = dict()

        to_return['bias'] = 1.0
        to_return['user:' + self.user] = 1.0
        to_return['format:' + self.format] = 1.0
        to_return['token:' + self.token.lower()] = 1.0

        to_return['part_of_speech:' + self.part_of_speech] = 1.0
        for morphological_feature in self.morphological_features:
            to_return['morphological_feature:' + morphological_feature] = 1.0
        to_return['dependency_label:' + self.dependency_label] = 1.0
        
        time = datetime.now()
        if(time.second %10 == 0 and time.microsecond == 0):
          print(time)
          
        return to_return

In [None]:
class LogisticRegressionInstance(namedtuple('Instance', ['features', 'label', 'name'])):
    """
    A named tuple for packaging together the instance features, label, and name.
    """
    def __new__(cls, features, label, name):
        if label:
            if not isinstance(label, (int, float)):
                raise TypeError('LogisticRegressionInstance label must be a number.')
            label = float(label)
        if not isinstance(features, dict):
            raise TypeError('LogisticRegressionInstance features must be a dict.')
        return super(LogisticRegressionInstance, cls).__new__(cls, features, label, name)


class LogisticRegression(object):
    """
    An L2-regularized logistic regression object trained using stochastic gradient descent.
    """

    def __init__(self, sigma=_DEFAULT_SIGMA, eta=_DEFAULT_ETA):
        super(LogisticRegression, self).__init__()
        self.sigma = sigma  # L2 prior variance
        self.eta = eta  # initial learning rate
        self.weights = defaultdict(lambda: uniform(-1.0, 1.0)) # weights initialize to random numbers
        self.fcounts = None # this forces smaller steps for things we've seen often before

    def predict_instance(self, instance):
        """
        This computes the logistic function of the dot product of the instance features and the weights.
        We truncate predictions at ~10^(-7) and ~1 - 10^(-7).
        """
        a = min(17., max(-17., sum([float(self.weights[k]) * instance.features[k] for k in instance.features])))
        return 1. / (1. + math.exp(-a))

    def error(self, instance):
        return instance.label - self.predict_instance(instance)

    def reset(self):
        self.fcounts = defaultdict(int)

    def training_update(self, instance):
        if self.fcounts is None:
            self.reset()
        err = self.error(instance)
        for k in instance.features:
            rate = self.eta / math.sqrt(1 + self.fcounts[k])
            # L2 regularization update
            if k != 'bias':
                self.weights[k] -= rate * self.weights[k] / self.sigma ** 2
            # error update
            self.weights[k] += rate * err * instance.features[k]
            # increment feature count for learning rate
            self.fcounts[k] += 1

    def train(self, train_set, dev_set, iterations=10):
        for it in range(iterations):
            print('Training iteration ' + str(it+1) + '/' + str(iterations) + '...')
            shuffle(train_set)
            i = 0
            for instance in train_set:
                self.training_update(instance)
                if(i % 100 == 0):
                    print(str(i) + " out of " + str(len(train_set)))
                    i +=1
            predictions = self.predict_test_set(dev_set)
            labels = load_labels("/content/en_es/en_es.slam.20190204.dev.key")

            directory = os.path.dirname("/content/out.pred" + str(it))
            if not os.path.exists(directory):
                os.makedirs(directory)

            with open("/content/out.pred" + str(it), 'wt') as f:
              for instance_id, prediction in iteritems(predictions):
                  f.write(instance_id + ' ' + str(prediction) + '\n')
                  
            predictions = load_labels("/content/out.pred" + str(it))

            actual = []
            predicted = []

            for instance_id in iterkeys(labels):
                try:
                    actual.append(labels[instance_id])
                    predicted.append(predictions[instance_id])
                except KeyError:
                    print('No prediction for instance ID ' + instance_id + '!')

            acc, avg_log_loss, auroc, F1 = evaluate_metrics(actual, predicted)
            print("acc : " + str(acc) + " avg log loss: " + str(avg_log_loss) + " auroc: " + str(auroc) + " F1: " + str(F1))
            print('Saving to WandB')
            wandb.log({'Log Loss': avg_log_loss, 'aucroc': auroc, 'F1': F1,'accuracy': acc})

        
        print('\n')

    def predict_test_set(self, test_set):
        return {instance.name: self.predict_instance(instance) for instance in test_set}


In [None]:
def load_labels(filename):
    """
    This loads labels, either the actual ones or your predictions.

    Parameters:
        filename: the filename pointing to your labels

    Returns:
        labels: a dict of instance_ids as keys and labels between 0 and 1 as values
    """
    labels = dict()

    with open(filename, 'rt') as f:
        for line in f:
            line = line.strip()
            if len(line) == 0:
                continue
            else:
                line = line.split()
            instance_id = line[0]
            label = float(line[1])
            labels[instance_id] = label
    return labels


def compute_acc(actual, predicted):
    """
    Computes the accuracy of your predictions, using 0.5 as a cutoff.

    Note that these inputs are lists, not dicts; they assume that actual and predicted are in the same order.

    Parameters (here and below):
        actual: a list of the actual labels
        predicted: a list of your predicted labels
    """
    num = len(actual)
    acc = 0.
    for i in range(num):
        if round(actual[i], 0) == round(predicted[i], 0):
            acc += 1.
    acc /= num
    return acc


def compute_avg_log_loss(actual, predicted):
    """
    Computes the average log loss of your predictions.
    """
    num = len(actual)
    loss = 0.

    for i in range(num):
        p = predicted[i] if actual[i] > .5 else 1. - predicted[i]
        loss -= math.log(p)
    loss /= num
    return loss


def compute_auroc(actual, predicted):
    """
    Computes the area under the receiver-operator characteristic curve.
    This code a rewriting of code by Ben Hamner, available here:
    https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/auc.py
    """
    num = len(actual)
    temp = sorted([[predicted[i], actual[i]] for i in range(num)], reverse=True)

    sorted_predicted = [row[0] for row in temp]
    sorted_actual = [row[1] for row in temp]

    sorted_posterior = sorted(zip(sorted_predicted, range(len(sorted_predicted))))
    r = [0 for k in sorted_predicted]
    cur_val = sorted_posterior[0][0]
    last_rank = 0
    for i in range(len(sorted_posterior)):
        if cur_val != sorted_posterior[i][0]:
            cur_val = sorted_posterior[i][0]
            for j in range(last_rank, i):
                r[sorted_posterior[j][1]] = float(last_rank+1+i)/2.0
            last_rank = i
        if i==len(sorted_posterior)-1:
            for j in range(last_rank, i+1):
                r[sorted_posterior[j][1]] = float(last_rank+i+2)/2.0

    num_positive = len([0 for x in sorted_actual if x == 1])
    num_negative = num - num_positive
    sum_positive = sum([r[i] for i in range(len(r)) if sorted_actual[i] == 1])
    auroc = ((sum_positive - num_positive * (num_positive + 1) / 2.0) / (num_negative * num_positive))

    return auroc


def compute_f1(actual, predicted, cutoff = 0.5):
    """
    Computes the F1 score of your predictions. Note that we use 0.5 as the cutoff here.
    """
    num = len(actual)

    true_positives = 0
    false_positives = 0
    false_negatives = 0
    true_negatives = 0

    for i in range(num):
        if actual[i] >= cutoff and predicted[i] >= cutoff:
            true_positives += 1
        elif actual[i] < cutoff and predicted[i] >= cutoff:
            false_positives += 1
        elif actual[i] >= cutoff and predicted[i] < cutoff:
            false_negatives += 1
        else:
            true_negatives += 1

    try:
        precision = true_positives / (true_positives + false_positives)
        print(precision)
        recall = true_positives / (true_positives + false_negatives)
        print(recall)
        F1 = 2 * precision * recall / (precision + recall)
    except ZeroDivisionError:
        F1 = 0.0

    return F1


def evaluate_metrics(actual, predicted):
    """
    This computes and returns a dictionary of notable evaluation metrics for your predicted labels.
    """
    acc = compute_acc(actual, predicted)
    avg_log_loss = compute_avg_log_loss(actual, predicted)
    auroc = compute_auroc(actual, predicted)
    F1 = compute_f1(actual, predicted)

    return  acc, avg_log_loss,  auroc, F1


def test_metrics():
    actual = [1, 0, 0, 1, 1, 0, 0, 1, 0, 1]
    predicted = [0.8, 0.2, 0.6, 0.3, 0.1, 0.2, 0.3, 0.9, 0.2, 0.7]
    metrics = evaluate_metrics(actual, predicted)
    metrics = {key: round(metrics[key], 3) for key in iterkeys(metrics)}
    assert metrics['accuracy'] == 0.700
    assert metrics['avglogloss'] == 0.613
    assert metrics['auroc'] == 0.740
    assert metrics['F1'] == 0.667
    print('Verified that our environment is calculating metrics correctly.')

In [None]:
training_data, training_labels = load_data("/content/en_es/en_es.slam.20190204.train")

Loading instances...
Loaded 317049 instances across 100000 exercises...
Loaded 635368 instances across 200000 exercises...
Loaded 951536 instances across 300000 exercises...
Loaded 1271940 instances across 400000 exercises...
Loaded 1591344 instances across 500000 exercises...
Loaded 1911212 instances across 600000 exercises...
Loaded 2227444 instances across 700000 exercises...
Loaded 2546704 instances across 800000 exercises...
Done loading 2622957 instances across 824012 exercises.



In [None]:
test_data = load_data("/content/en_es/en_es.slam.20190204.dev")

Loading instances...
Loaded 334439 instances across 100000 exercises...
Done loading 387374 instances across 115770 exercises.



In [None]:
training_data[0].to_features()

{'bias': 1.0,
 'user:XEinXf5+': 1.0,
 'format:reverse_translate': 1.0,
 'token:i': 1.0,
 'part_of_speech:PRON': 1.0,
 'morphological_feature:Case': 1.0,
 'morphological_feature:Number': 1.0,
 'morphological_feature:Person': 1.0,
 'morphological_feature:PronType': 1.0,
 'morphological_feature:fPOS': 1.0,
 'dependency_label:nsubj': 1.0}

In [None]:
#Convert into data that can be used to train the BKT agent

exercices = {}
word_dict = {}
pos_dict = {}
format_dict = {}
dependency_label_dict = {}
morphological_feature_dict = {}


unique_word_index = 0;
unique_pos_index = 0;
unique_format_index = 0;
unique_dependency_label_index = 0;
unique_morphological_feature_index = 0;

for instance in training_data:
  user = instance.user
  instance_id = instance.instance_id[:-2]
  if user not in exercices:
    exercices[user] = {}
  if instance_id not in exercices[user] :
    exercices[user][instance_id] = []
  
  token = instance.token.lower()
  part_of_speech =  instance.part_of_speech.lower()
  format = instance.format
  dependency_label = instance.dependency_label
  morphological_features = instance.morphological_features

  token_info = []

  if token in word_dict:
    #exercices[user][instance_id].append(word_dict[token])
    pass
  else:
    word_dict[token] = unique_word_index
    #exercices[user][instance_id].append(word_dict[token])
    unique_word_index += 1
  
  token_info.append(word_dict[token])

  if part_of_speech in pos_dict:
    #exercices[user][instance_id].append(pos_dict[part_of_speech])
    pass
  else:
    pos_dict[part_of_speech] = unique_pos_index
    #exercices[user][instance_id].append(pos_dict[part_of_speech])
    unique_pos_index += 1
  token_info.append(pos_dict[part_of_speech])

  if format in format_dict:
    #exercices[user][instance_id].append(format_dict[format])
    pass
  else:
    format_dict[format] = unique_format_index
    #exercices[user][instance_id].append(format_dict[format])
    unique_format_index += 1
  
  token_info.append(format_dict[format])

  if dependency_label in dependency_label_dict:
    #exercices[user][instance_id].append(dependency_label_dict[dependency_label])
    pass
  else:
    dependency_label_dict[dependency_label] = unique_dependency_label_index
    #exercices[user][instance_id].append(dependency_label_dict[dependency_label])
    unique_dependency_label_index += 1
  
  token_info.append(dependency_label_dict[dependency_label])


  morphology = []
  for feature_key, feature_val in morphological_features.items():
    key_val = str(feature_key) + ":" + str(feature_val)
    if key_val in morphological_feature_dict:
      morphology.append(morphological_feature_dict[key_val])
    else:
      morphological_feature_dict[key_val]= unique_morphological_feature_index
      unique_morphological_feature_index +=1 
      morphology.append(morphological_feature_dict[key_val])

  token_info.append(morphology)
  exercices[user][instance_id].append(token_info)



In [None]:
exercices_merged = {}
for user in exercices:
  exercices_merged[user] = list(exercices[user].values())

In [None]:
test_exercices = {}
for instance in test_data:
  user = instance.user
  instance_id = instance.instance_id[:-2]
  if user not in test_exercices:
    test_exercices[user] = {}
  if instance_id not in test_exercices[user] :
    test_exercices[user][instance_id] = []
  token = instance.token.lower()
  
  part_of_speech =  instance.part_of_speech.lower()
  format = instance.format
  dependency_label = instance.dependency_label
  morphological_features = instance.morphological_features

  token_info = []
  if token in word_dict:
    token_info.append(word_dict[token])
  else:
    token_info.append(-1)
  token_info.append(pos_dict[part_of_speech])
  token_info.append(format_dict[format])
  token_info.append(dependency_label_dict[dependency_label])


  morphology = []
  for feature_key, feature_val in morphological_features.items():
    key_val = str(feature_key) + ":" + str(feature_val)
    morphology.append(morphological_feature_dict[key_val])
  token_info.append(morphology)

  test_exercices[user][instance_id].append(token_info)


test_exercices_merged = {}
for user in test_exercices:
  test_exercices_merged[user] = list(test_exercices[user].values())

In [None]:
# training_instances = [LogisticRegressionInstance(features=instance_data.to_features(),
#                                                   label=training_labels[instance_data.instance_id],
#                                                   name=instance_data.instance_id
#                                                   ) for instance_data in training_data]

In [None]:
# test_instances = [LogisticRegressionInstance(features=instance_data.to_features(),
#                                                  label=None,
#                                                  name=instance_data.instance_id
#                                                  ) for instance_data in test_data]

In [None]:
# logistic_regression_model = LogisticRegression()

In [None]:
# logistic_regression_model.train(training_instances,test_instances, iterations=20)

In [None]:
# predictions = logistic_regression_model.predict_test_set(test_instances)

In [None]:
# with open("/content/out.pred", 'wt') as f:
#     for instance_id, prediction in iteritems(predictions):
#         f.write(instance_id + ' ' + str(prediction) + '\n')

In [None]:
# print('\nLoading labels for exercises...')
# labels = load_labels("/content/en_es/en_es.slam.20190204.dev.key")
# print(labels)
# print('Loading predictions for exercises...')
# predictions = load_labels("/content/out.pred")

# actual = []
# predicted = []

# for instance_id in iterkeys(labels):
#     try:
#         actual.append(labels[instance_id])
#         predicted.append(predictions[instance_id])
#     except KeyError:
#         print('No prediction for instance ID ' + instance_id + '!')

# metrics = evaluate_metrics(actual, predicted)
# line = '\t'.join([('%s=%.3f' % (metric, value)) for (metric, value) in iteritems(metrics)])
# print('Metrics:\t' + line)

## Test BKT 

In [None]:
import numpy as np

In [None]:
class BKTLearner(object):
    
    def __init__(self, token_state_size, pos_state_size, format_state_size, dependency_state_size, morphological_state_size, slip_prob, transition_prob, guess_prob):
        self.token_state_size = token_state_size
        self.pos_state_size = pos_state_size
        self.format_state_size = format_state_size
        self.dependency_state_size = dependency_state_size
        self.morphological_state_size = morphological_state_size

        self.token_state = np.full(self.token_state_size,0.2, dtype = np.float32)
        self.pos_state = np.full(self.pos_state_size,0.2,dtype = np.float32)
        self.format_state = np.full(self.format_state_size,0.2,dtype = np.float32)
        self.dependency_state = np.full(self.dependency_state_size,0.2,dtype = np.float32)
        self.morphological_state = np.full(self.morphological_state_size,0.2,dtype = np.float32)

        self.slip_prob = slip_prob
        self.transition_prob = transition_prob
        self.guess_prob = guess_prob
    
    def reset(self):
        self.state = np.zeros(self.state_size)

    def getNetLearnedProb(self,token_info):
      token_index = token_info[0]
      pos_index = token_info[1]
      format_index = token_info[2]
      dep_index = token_info[3]
      morphological_indices = token_info[4]
      net_learned = self.token_state[token_index] * self.pos_state[pos_index] #* self.format_state[format_index] * self.dependency_state[dep_index]
      for index in morphological_indices:
       net_learned *= self.morphological_state[index]

      return net_learned

    
    def predictAnswerProbabilities(self, input):
        answer = []
        for token_info in input:
            net_learned = self.getNetLearnedProb(token_info)
            p_correct = net_learned * (1 - self.slip_prob) + (1 - net_learned) * self.guess_prob 
            #value = np.random.choice(np.array([0,1]), p = np.array([1 - p_correct, p_correct]))
            answer.append(p_correct)
        return np.array(answer)



    def predictAnswer(self, input):
        answer = []
        for token_info in input:
            net_learned = self.getNetLearnedProb(token_info)
            #print(net_learned)
            p_correct = net_learned * (1 - self.slip_prob) + (1 - net_learned) * self.guess_prob 
            value = np.random.choice(np.array([0,1]), p = np.array([1 - p_correct, p_correct]))
            answer.append(value)
        return np.array(answer)
    
    def getPosterior(self, prob, output_correctness):
      if output_correctness == 1:
        posterior = prob*(1 - self.slip_prob) / (prob*(1 - self.slip_prob) + (1 - prob)*self.guess_prob)
      else:
        posterior = prob*(self.slip_prob) / (prob*(self.slip_prob) + (1 - prob)*(1 - self.guess_prob))
      return posterior

    def updateKnowledgeState(self, output_correctness, input):
        i = 0
        for token_info in input:
          #print(token_info[0])
          #print(word_dict[token_info[0]])
          token_posterior = self.getPosterior(self.token_state[token_info[0]], output_correctness[i])
          #print("post " + str(token_posterior + (1 - token_posterior) * self.transition_prob))
          self.token_state[token_info[0]] = token_posterior + (1 - token_posterior) * self.transition_prob
          #print(self.token_state[token_info[0]])
          

          pos_posterior = self.getPosterior(self.pos_state[token_info[1]], output_correctness[i])
          self.pos_state[token_info[1]] = pos_posterior + (1 - pos_posterior) * self.transition_prob

          format_posterior = self.getPosterior(self.format_state[token_info[2]], output_correctness[i])
          self.format_state[token_info[2]] = format_posterior + (1 - format_posterior) *  self.transition_prob

          dep_posterior = self.getPosterior(self.dependency_state[token_info[3]], output_correctness[i])
          self.dependency_state[token_info[3]] = dep_posterior + (1 - dep_posterior) * self.transition_prob

          for index in token_info[4]:
           morpho_posterior = self.getPosterior(self.morphological_state[index], output_correctness[i])
           self.morphological_state[index] = morpho_posterior + (1 - morpho_posterior) * self.transition_prob
          i += 1 

    def trainOneSet(self, excercises):
        for exercise in excercises:
            answer_correctness = self.predictAnswer(exercise)
            self.updateKnowledgeState(answer_correctness, exercise)
    
    def testOneSetProbabilities(self, excercises):
        answer_correctness = []
        for exercise in excercises:
            answer_correctness_ex = self.predictAnswerProbabilities(exercise)
            answer_correctness.append(answer_correctness_ex)
        return np.array(answer_correctness)

    def testOneSet(self, excercises):
        answer_correctness = []
        for exercise in excercises:
            answer_correctness_ex = self.predictAnswer(exercise)
            answer_correctness.append(answer_correctness_ex)
        return np.array(answer_correctness)
        
    def computeAccuracyForTest(self, test_response):
        correct = 0;
        total = 0;
        for exercise in test_response:
            for token in exercise:
                correct += token
                total += 1
        if(total == 0):
          return 0
        return float(correct)/total * 100
    
    def train(self, exercices_all, train_duration, test_duration):
        i = 0;
        accuracy = 0
        batch = 0
        cummulative_reward = 0;
        while i < len(exercices_all):
            if(train_duration + i < len(exercices_all)):
              train_batch = exercices_all[i:train_duration + i]
            else: 
              train_batch = exercices_all[i:]
            i += train_duration
            if(i + test_duration < len(exercices_all)):
              test_batch = exercices_all[i:i + test_duration]
            else:
               test_batch = exercices_all[i:]
            i += test_duration

            answer_correctness_before = self.testOneSetProbabilities(test_batch)
            #print(answer_correctness_before)
            answer_correctness_before = [np.where(answer_correctness_before[i] >= 0.5 , 1, 0 ) for i in range(len(answer_correctness_before))]
            self.trainOneSet(train_batch)
            answer_correctness = self.testOneSetProbabilities(test_batch)
            answer_correctness = [np.where(answer_correctness[i] >= 0.5 , 1, 0 ) for i in range(len(answer_correctness))]

            accuracy_before = self.computeAccuracyForTest(answer_correctness_before) 
            accuracy = self.computeAccuracyForTest(answer_correctness) 
            print("Batch + " + str(batch) + " " + " correct before: " + str(accuracy_before) + " correct after: " + str(accuracy))
            cummulative_reward += accuracy - accuracy_before
            wandb.log({'Batch Accuracy After': accuracy, 'Batch Accuracy Before': accuracy_before, "reward": accuracy - accuracy_before, "cummulative reward":cummulative_reward})
            batch += 1

In [None]:
answer_correctness_before = np.zeros(15)
np.where(answer_correctness_before >= 0.5 , 1, 0 )

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
run = wandb.init(
    name = "Reward Changes BKT - All Skills", ## Wandb creates random run names if you skip this field
    reinit = True, ### Allows reinitalizing runs when you re-run this cell
    # run_id = ### Insert specific run id here if you want to resume a previous run
    # resume = "must" ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "BaseLine Ablations" ### Project should be created in your wandb account 
    #config = config ### Wandb Config for your run
)

In [None]:
all_ex = []
for token_list in exercices_merged.values():
  all_ex.extend(token_list) 


In [None]:
len(word_dict), len(pos_dict), len(format_dict), len(dependency_label_dict), len(morphological_feature_dict)

(1967, 16, 3, 41, 80)

In [None]:
np.random.shuffle(all_ex)
learner = BKTLearner(len(word_dict),len(pos_dict), len(format_dict), len(dependency_label_dict),len(morphological_feature_dict), 0.1, 0.1, 0.001)
learner.train(all_ex, 50, 20)

  return np.array(answer_correctness)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Batch + 6772  correct before: 100.0 correct after: 100.0
Batch + 6773  correct before: 100.0 correct after: 100.0
Batch + 6774  correct before: 100.0 correct after: 100.0
Batch + 6775  correct before: 100.0 correct after: 100.0
Batch + 6776  correct before: 100.0 correct after: 100.0
Batch + 6777  correct before: 100.0 correct after: 100.0
Batch + 6778  correct before: 100.0 correct after: 100.0
Batch + 6779  correct before: 100.0 correct after: 100.0
Batch + 6780  correct before: 100.0 correct after: 100.0
Batch + 6781  correct before: 100.0 correct after: 100.0
Batch + 6782  correct before: 100.0 correct after: 100.0
Batch + 6783  correct before: 100.0 correct after: 100.0
Batch + 6784  correct before: 100.0 correct after: 100.0
Batch + 6785  correct before: 100.0 correct after: 100.0
Batch + 6786  correct before: 100.0 correct after: 100.0
Batch + 6787  correct before: 100.0 correct after: 100.0
Batch + 6788  correct b

In [None]:
# Actor critique

# Actor in dimention = dim of state

# actions: 
# set of excersices + train_duratino

# reward 
# reward - time_penalty_weight*cumilative train_duration

# 

In [None]:
from  torchsummary import summary

In [None]:
import random
from typing import List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from IPython.display import clear_output
from torch.distributions import Normal

In [None]:
from torch import nn
import torch
import torch.nn.functional as F
def initialize_uniformly(layer: nn.Linear, init_w: float = 3e-3):
    """Initialize the weights and bias in [-init_w, init_w]."""
    layer.weight.data.uniform_(-init_w, init_w)
    layer.bias.data.uniform_(-init_w, init_w)

# I
class Actor(nn.Module):
    def __init__(self, in_dim: int, out_dim: int):
        """Initialize.
        in_dim: state = BKT's knowledge state
        out_dim: len(list of excersize to be choosen) # SHould we just make it an int num? Or onehot?
         
        """
        super(Actor, self).__init__()
        
        self.hidden1 = nn.Linear(in_dim, 128)
        self.mu_layer = nn.Linear(128, out_dim)     
        self.log_std_layer = nn.Linear(128, out_dim)   
        
        initialize_uniformly(self.mu_layer)
        initialize_uniformly(self.log_std_layer)

    def forward(self, state: torch.Tensor) -> torch.Tensor:
        """Forward method implementation."""
        x = F.relu(self.hidden1(state))
        
        mu = torch.tanh(self.mu_layer(x)) * 2
        log_std = F.softplus(self.log_std_layer(x))
        std = torch.exp(log_std)
        
        dist = Normal(mu, std)
        action = dist.sample()
        
        return action, dist
    
class Critic(nn.Module):
    def __init__(self, in_dim: int):
        """Initialize."""
        super(Critic, self).__init__()
        
        self.hidden1 = nn.Linear(in_dim, 128)
        self.out = nn.Linear(128, 1)
        
        initialize_uniformly(self.out)

    def forward(self, state: torch.Tensor) -> torch.Tensor:
        """Forward method implementation."""
        x = F.relu(self.hidden1(state))
        value = self.out(x)
        
        return value

In [None]:
actor = Actor(in_dim=1000, out_dim=256)
summary(actor,(1,1000))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 128]         128,128
            Linear-2               [-1, 1, 256]          33,024
            Linear-3               [-1, 1, 256]          33,024
Total params: 194,176
Trainable params: 194,176
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.74
Estimated Total Size (MB): 0.75
----------------------------------------------------------------


In [None]:
class A2CAgent:
    """A2CAgent interacting with environment.
        
    Atribute:
        env (BKTLearner): BKT Mode;
        gamma (float): discount factor
        entropy_weight (float): rate of weighting entropy into the loss function ??
        device (torch.device): cpu / gpu
        actor (nn.Module): target actor model to select actions
        critic (nn.Module): critic model to predict state values
        actor_optimizer (optim.Optimizer) : optimizer of actor
        critic_optimizer (optim.Optimizer) : optimizer of critic
        transition (list): temporory storage for the recent transition
        total_step (int): total step numbers
        is_test (bool): flag to show the current mode (train / test)
    """

    def __init__(self, env, gamma: float, entropy_weight: float):
        """Initialize."""
        self.env = env
        self.gamma = gamma
        self.entropy_weight = entropy_weight
        
        # device: cpu / gpu
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu"
        )
        print(self.device)
        
        # networks
        # obs_dim = env.observation_space.shape[0]
        # action_dim = env.action_space.shape[0]
        obs_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        
        self.actor = Actor(obs_dim, action_dim).to(self.device)
        self.critic = Critic(obs_dim).to(self.device)
        
        # optimizer
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-4)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3)
        
        # transition (state, log_prob, next_state, reward, done)
        self.transition: list = list()
        
        # total steps count
        self.total_step = 0

        # mode: train / test
        self.is_test = False
        
    def select_action(self, state: np.ndarray) -> np.ndarray:
        """Select an action from the input state."""
        state = torch.FloatTensor(state).to(self.device)
        action, dist = self.actor(state)
        selected_action = dist.mean if self.is_test else action

        if not self.is_test:
            log_prob = dist.log_prob(selected_action).sum(dim=-1)
            self.transition = [state, log_prob]
        
        return selected_action.clamp(-2.0, 2.0).cpu().detach().numpy()
    
    def step(self, action: np.ndarray) -> Tuple[np.ndarray, np.float64, bool]:
        """Take an action and return the response of the env."""
        next_state, reward, done, _ = self.env.step(action)
        
        if not self.is_test:
            self.transition.extend([next_state, reward, done])           
    
        return next_state, reward, done
    
    def update_model(self) -> Tuple[torch.Tensor, torch.Tensor]:
        """Update the model by gradient descent."""  
        state, log_prob, next_state, reward, done = self.transition

        # Q_t   = r + gamma * V(s_{t+1})  if state != Terminal
        #       = r                       otherwise
        mask = 1 - done
        next_state = torch.FloatTensor(next_state).to(self.device)
        pred_value = self.critic(state)
        targ_value = reward + self.gamma * self.critic(next_state) * mask
        value_loss = F.smooth_l1_loss(pred_value, targ_value.detach())
        
        # update value
        self.critic_optimizer.zero_grad()
        value_loss.backward()
        self.critic_optimizer.step()

        # advantage = Q_t - V(s_t)
        advantage = (targ_value - pred_value).detach()  # not backpropagated
        policy_loss = -advantage * log_prob
        policy_loss += self.entropy_weight * -log_prob  # entropy maximization

        # update policy
        self.actor_optimizer.zero_grad()
        policy_loss.backward()
        self.actor_optimizer.step()

        return policy_loss.item(), value_loss.item()
    
    def train(self, num_frames: int, plotting_interval: int = 200):
        """Train the agent."""
        self.is_test = False
        
        actor_losses, critic_losses, scores = [], [], []
        state = self.env.reset()
        score = 0
        
        for self.total_step in range(1, num_frames + 1):
            action = self.select_action(state)
            next_state, reward, done = self.step(action)
            
            actor_loss, critic_loss = self.update_model()
            actor_losses.append(actor_loss)
            critic_losses.append(critic_loss)
            
            state = next_state
            score += reward
            
            # if episode ends
            if done:         
                state = env.reset()
                scores.append(score)
                score = 0                
            
            # plot
            if self.total_step % plotting_interval == 0:
                self._plot(self.total_step, scores, actor_losses, critic_losses)
        self.env.close()
    
    def test(self):
        """Test the agent."""
        self.is_test = True
        
        state = self.env.reset()
        done = False
        score = 0
        
        frames = []
        while not done:
            frames.append(self.env.render(mode="rgb_array"))
            action = self.select_action(state)
            next_state, reward, done = self.step(action)

            state = next_state
            score += reward
        
        print("score: ", score)
        self.env.close()
        
        return frames
    
    def _plot(
        self, 
        frame_idx: int, 
        scores: List[float], 
        actor_losses: List[float], 
        critic_losses: List[float], 
    ):
        """Plot the training progresses."""
        def subplot(loc: int, title: str, values: List[float]):
            plt.subplot(loc)
            plt.title(title)
            plt.plot(values)

        subplot_params = [
            (131, f"frame {frame_idx}. score: {np.mean(scores[-10:])}", scores),
            (132, "actor_loss", actor_losses),
            (133, "critic_loss", critic_losses),
        ]

        clear_output(True)
        plt.figure(figsize=(30, 5))
        for loc, title, values in subplot_params:
            subplot(loc, title, values)
        plt.show()

In [None]:
critic = Critic(in_dim=1000)
summary(critic,(1,1000))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 128]         128,128
            Linear-2                 [-1, 1, 1]             129
Total params: 128,257
Trainable params: 128,257
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.49
Estimated Total Size (MB): 0.49
----------------------------------------------------------------


In [None]:
labels = load_labels("/content/en_es/en_es.slam.20190204.dev.key")
actual = []

for instance_id in iterkeys(labels):
    try:
        actual.append(labels[instance_id])
    except KeyError:
        print('No prediction for instance ID ' + instance_id + '!')
final_result = []
for user in test_exercices_merged:
  final_result.append(results[user])

auroc = compute_auroc(actual,np.concatenate(final_result))
f1 = compute_f1(actual,np.concatenate(final_result), 0.5)
acc = compute_acc(actual,np.concatenate(final_result))
print("Acc " + str(acc) + "auroc " + str(auroc) + " f1 : " + str(f1))

NameError: ignored