In [2]:
from os.path import exists

if 'google.colab' in str(get_ipython()):
  from google.colab import drive

  if not exists('/content/drive/MyDrive'):
    drive.mount('/content/drive/')

  if not exists('/content/drive/MyDrive/cs285_project'):
    !git pull https://github.com/anthonybajoua/cs285_project.git


  %cd /content/drive/MyDrive/cs285_project
else:
  pass

/content/drive/MyDrive/cs285_project


In [7]:
!git remote

origin	https://github.com/anthonybajoua/cs285_project.git (fetch)
origin	https://github.com/anthonybajoua/cs285_project.git (push)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools, os, torch
from torch import nn

from sim import Scheduler, Random, Leitner
from data_process import process_original, reduce_df, eval_thresh, reduce_lexemes, normalize
from get_trajectory import trajectory_generator, makeSingle

import torch.nn as nn
import torch
import random
import cs285.infrastructure.pytorch_util as ptu
from tqdm.notebook import trange, tqdm

%load_ext autoreload
%autoreload 2

ptu.init_gpu()

## Load data (clean if necesarry)

In [None]:
if not os.path.exists("data/cleaned.csv"):
    process_original()

df = pd.read_csv("data/cleaned.csv")

In [None]:
lang_map = {'de' : 0, 'en': 1, 'es': 2, 'fr': 3, 'it': 4, 'pt': 5}
l_map = pd.read_csv("data/lexeme_map.csv")

df.groupby('learning_language').count().loc[:, 'user_id']

df = df.loc[df['learning_language'] == 1]
df = df.drop(['learning_language'], axis=1)
reduce_df(df)
normalize(df,'difficulty')


english_counts = df.groupby('lexeme_id').count().loc[:, 'timestamp']
n_lex = len(english_counts)
print(f"There are {n_lex} lexemes")

We have about 5 million english items, 3 million Spanish 1.9 million French and 1.4 million German. Italian and Portugese each have hundreds of thousands. It would be useful to restrict out studies to just the English users so we reduce the dimensionality of our action and state spaces.

There are 43.8 thousand learners (trajectories) we have to provide our RL agents.



In [None]:
n_items = int(5000)

eval_thresh(df, english_counts, n_items)

df, included = reduce_lexemes(df, n_items)

tg = trajectory_generator(df, included)

## Run Behavioral Cloning (singular input case)

In [None]:
def createMLP(inSize, outSize, hidSize, nHidden, activation=nn.ReLU()):
    '''
    inSize - input size
    outSize - output size
    hidSize - hidden layer size
    nHidden - number hidden layers
    '''
    activation = nn.ReLU
    bc_loss = nn.MSELoss()
    layers = [nn.Linear(inSize, hidSize)]
    for i in range(nHidden):
        layers.append(nn.Linear(hidSize, hidSize))
        layers.append(activation())
    layers.append(nn.Linear(hidSize, outSize))
    net = nn.Sequential(*layers)
    return net



In [None]:
def trainNetwork(net, data, included, epochs, \
                 nTraj=500, lr=1e-3, lossFn=nn.MSELoss(), norm=True):
  '''
  Train a BC neural net on data with included lexemes for epochs.
  '''
  tL, vL = [], []

  net = net.to('cuda')
  opt = torch.optim.Adam(net.parameters(), lr=lr)

  for _ in range(epochs):

    tg = trajectory_generator(data, included, nTraj=nTraj)

    val = False

    for states, actions, _, _, _ in tg:
      
      states, actions = makeSingle(states, actions)

      stateMat = np.vstack(list(states.values()))
      actionMat = np.vstack(list(actions.values()))

      states = ptu.from_numpy(stateMat)

      if norm:
        states = (states - states.mean(dim=0))/states.std(dim=0)

      actions = ptu.from_numpy(actionMat)

      if not val:
        valStates, valActions = states, actions
        val = True

      
      preds = net.forward(states)
      loss = lossFn(preds, actions)
      valLoss = lossFn(valActions, net.forward(valStates))

      print("\n\n")
      print(torch.mean(actions).item(), torch.max(actions).item())
      print(loss.item(), valLoss.item())
      print(torch.mean(preds).item(), torch.max(preds).item(), torch.min(preds).item())

      print(torch.sum(preds > .5).item(), torch.sum(actions > .5).item())

      opt.zero_grad()
      loss.backward()
      opt.step()

      tL.append(loss.item())
      vL.append(valLoss.item())
  return tL, vL

net = createMLP(4, 1, 32, 2)
torch.cuda.empty_cache()
tL, eL = trainNetwork(net, df, included, 2, lr=1e-3, lossFn=nn.MSELoss())


## Run Behavioral Cloning

In [None]:
traj_idxs = list(states.keys())
def sample_traj(states, actions, idxs, batch_size):
    first_idx = idxs.pop()
    states_cat = states[first_idx]
    actions_cat = actions[first_idx]
    while states_cat.shape[0] < batch_size and len(idxs) > 0:
        new_idx = idxs.pop()
        states_cat = np.concatenate((states_cat, states[new_idx]))
        actions_cat = np.concatenate((actions_cat, actions[new_idx]))
    return states_cat, actions_cat





In [None]:
state1, act1 = sample_traj(states, actions, [traj_idxs[0]], 1)
input_size = state1.shape[1]
output_size = act1.shape[1]
hidden_size = 3000
hidden_num = 2
activation = nn.ReLU
bc_loss = nn.MSELoss()
layers = [nn.Linear(input_size, hidden_size), activation()]
for i in range(hidden_num-1):
    layers.append(nn.Linear(hidden_size, hidden_size))
    layers.append(activation())
layers.append(nn.Linear(hidden_size, output_size))
bc_nn = nn.Sequential(*layers)
optimizer = torch.optim.Adam(bc_nn.parameters(), lr=1e-3)

In [None]:
train_iters = 2
train_losses = []
val_losses = []
batch_size = 200
val_batch_size = 200
split = int(np.floor(.9*len(traj_idxs)))
train_idxs = traj_idxs[:split]
val_idxs = traj_idxs[split:]
batches_per_epoch = split // batch_size

for i in tqdm(range(train_iters)):
    idxs = train_idxs[:]
    random.shuffle(idxs)
    val_shuffled = val_idxs[:]
    random.shuffle(val_shuffled)
    for j in tqdm(range(batches_per_epoch)):
        optimizer.zero_grad()
        s, a = sample_traj(states, actions, idxs[j*batch_size:(j+1)*batch_size], batch_size)
        result = bc_nn(ptu.from_numpy(s))
        loss = bc_loss(result, ptu.from_numpy(a))
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
        val_s, val_a = sample_traj(states, actions, val_shuffled[:], val_batch_size)
        val_losses.append(np.mean(np.square(ptu.to_numpy(bc_nn(ptu.from_numpy(val_s))) - val_a)))


In [None]:
plt.plot(train_losses[0:500], label='training')
plt.plot(val_losses[0:500], label='validation')
plt.yscale("log")
plt.title("MSE Of Actual vs Predicted Action (1100 lexemes)")

In [None]:
actions[3][0, :].shape

In [None]:
min(bc_nn.forward(s))