In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools, os, torch

from sim import Scheduler, Random, Leitner
from data_process import process_original, reduce_df, eval_thresh, reduce_lexemes
from get_trajectory import get_traj

%load_ext autoreload
%autoreload 2

## Load data (clean if necesarry)

In [2]:
if not os.path.exists("data/cleaned.csv"):
    process_original()
    

df = pd.read_csv("data/cleaned.csv")

## Data Exploration

In [3]:
lang_map = {'de' : 0, 'en': 1, 'es': 2, 'fr': 3, 'it': 4, 'pt': 5}
l_map = pd.read_csv("data/lexeme_map.csv")

df.groupby('learning_language').count().loc[:, 'user_id']

learning_language
0    1452597
1    5014791
2    3407689
3    1873734
4     793935
5     311480
Name: user_id, dtype: int64

We have about 5 million english items, 3 million Spanish 1.9 million French and 1.4 million German. Italian and Portugese each have hundreds of thousands. It would be useful to restrict out studies to just the English users so we reduce the dimensionality of our action and state spaces.

There are 43.8 thousand learners (trajectories) we have to provide our RL agents.



In [4]:
df = df.loc[df['learning_language'] == 1].copy()
df = df.drop(['learning_language'], axis=1)
reduce_df(df)
df.loc[:, 'difficulty'] = df.loc[:, 'difficulty'].astype(np.float32)

In [5]:
english_counts = df.groupby('lexeme_id').count().loc[:, 'timestamp']
n_lex = len(english_counts)
print(f"There are {n_lex} lexemes")

There are 2983 lexemes


In [6]:
n_items = int(500)

eval_thresh(df, english_counts, n_items)

For threshold 500 there are 24.81% lexemes above and 75.19% below

There would be 94.46% of data included and 5.54% of data excluded


In [7]:
idx_to_lex , lex_to_idx = {}, {}
df, included = reduce_lexemes(df, n_items)

states, actions, i_t_l, l_t_i = get_traj(df, included)

In [69]:
paths = []
for i in list(states.keys())[:50]:
    n_obs = np.roll(states[i], -1, axis=0)
    n_obs[n_obs.shape[0]-1, :] = 0
    terminals = np.zeros(n_obs.shape[0])
    terminals[terminals.shape[0]-1] = 1
    rewards = np.zeros(n_obs.shape[0])
    paths.append({
        "observation": states[i],
        "action": actions[i],
        "next_observation": n_obs,
        "terminal": terminals,
        "reward": rewards
    })
    

In [16]:
traj_dict = {
        "states": states,
        "actions": actions
    }

MemoryError: 

In [29]:
import pickle
with open('states.pkl', 'wb') as t:
    pickle.dump(states, t)

MemoryError: 

## Run behavioral cloning

In [40]:
from cs285.scripts.run_bc import BC_Trainer
import time

In [73]:
#@title runtime arguments

class Args:

  def __getitem__(self, key):
    return getattr(self, key)

  def __setitem__(self, key, val):
    setattr(self, key, val)

  #@markdown expert data
  expert_policy_file = '' #@param
#   expert_data = '../data/trajectories.pkl' #@param # maybe later get it from a file like this
  env_name = 'HLR' #@param ['HLR']
  exp_name = 'start' #@param
  do_dagger = False #@param {type: "boolean"}
  ep_len = 1000 #@param {type: "integer"}
  save_params = False #@param {type: "boolean"}

  num_agent_train_steps_per_iter = 1000 #@param {type: "integer"})
  n_iter = 1 #@param {type: "integer"})

  #@markdown batches & buffers
  batch_size = 30 #@param {type: "integer"})
  eval_batch_size = 1000 #@param {type: "integer"}
  train_batch_size = 30 #@param {type: "integer"}
  max_replay_buffer_size = 1000000 #@param {type: "integer"}

  #@markdown network
  n_layers = 2 #@param {type: "integer"}
  size = 64 #@param {type: "integer"}
  learning_rate = 5e-3 #@param {type: "number"}

  #@markdown logging
  video_log_freq = -1 #@param {type: "integer"}
  scalar_log_freq = 1 #@param {type: "integer"}

  #@markdown gpu & run-time settings
  no_gpu = False #@param {type: "boolean"}
  which_gpu = 0 #@param {type: "integer"}
  seed = 1 #@param {type: "integer"}

args = Args()
params = args

In [74]:
params['trajectories'] = traj_dict
params['paths'] = paths

In [76]:
# Logdir
data_path = os.path.join(os.getcwd(), 'data')
if not (os.path.exists(data_path)):
    os.makedirs(data_path)
logdir = 'bc_' + args.exp_name + '_hlr_' + time.strftime("%d-%m-%Y_%H-%M-%S")
logdir = os.path.join(data_path, logdir)
params['logdir'] = logdir
if not(os.path.exists(logdir)):
    os.makedirs(logdir)

In [78]:
trainer = BC_Trainer(params)
trainer.run_training_loop()

########################
logging outputs to  C:\Users\Sarah\cs285_project\data\bc_start_hlr_25-10-2020_16-45-21
########################
GPU not detected. Defaulting to CPU.


********** Iteration 0 ************

Training agent using sampled data from replay buffer...
agent_class <cs285.agents.bc_agent.BCAgent object at 0x0000022698D7D1D0>


RuntimeError: size mismatch, m1: [30 x 2220], m2: [1000 x 64] at ..\aten\src\TH/generic/THTensorMath.cpp:41