# Experiments with torchfm, which has a range of models based on FM (Factorization Machines). 
* None of these models are sequence-aware.
* Anaconda context must be set to "base". Eventually work with poetry.
* Try working with wandb (Weights & Biases)
* Starting with my_fm_copy.ipynb on July 21, 2021, integrate with elements of the code I wrote for rankfm. Specifically, I will read the data with the newlib.py library. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torchfm
from torchfm import layer as fm_layer, model as fm_model
from torchfm.model import fm, lr, nfm, wd
import torch.nn.functional as F
import pandas as pd
import pandas_options
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
tt = torch.tensor
import numpy as np
# import tqdm
import random
import matplotlib.pyplot as plt
import myfunclib as myfm
import d2l_torch as d2l
import torch_datalib as datalib
import wandb

from fastcore.all import L, AttrDict


In [3]:
torch.get_num_interop_threads(),  torch.get_num_threads()

(16, 16)

In [4]:
# device = d2l.try_gpu()
device = 'cuda'
print(device)

cuda


In [5]:
gdct = {
    'device': device,  # 'cpu'
    'embed_dim': 10,
    'nb_epochs': 10,
    'lr': 0.05,
    'wd': 1.e-5,
    'optim': 'adamW',
    'batch_size' : 4096
}

In [6]:
param_dct = AttrDict({
    'device': device,  # 'cpu'
    'embed_dim': 10,
    'nb_epochs': 100,
    'lr': 0.05,
    'wd': 1.e-5,
    'optim': 'adamW',
    'batch_size' : 1024*4
})

In [7]:
%%time 
in_file = "activity_reduced_with_attributes.csv"
dct = datalib.read_data_attributes_single_file(in_file, dct=param_dct, continuous_attrib=True)
dct.keys()

df_item_attrib columns:  ['D', 'avg_yr_l', 'avg_yr_h', 'IATA', 'LAT_DEC', 'LON_DEC', 'HEIGHT']
df_item_attrib shape:  (91, 7)
df_:  Index(['MEMBER_ID', 'D', 'age_departure', 'GENDER', 'avg_yr_l', 'avg_yr_h',
       'LAT_DEC', 'LON_DEC', 'HEIGHT'],
      dtype='object')
SHOULD NOT CREATE user_attrib_idx and item_attrib_idx manually! SHOULD DO THIS BEFORE CALL TO this method
CPU times: user 1.18 s, sys: 166 ms, total: 1.34 s
Wall time: 1.34 s


dict_keys(['device', 'embed_dim', 'nb_epochs', 'lr', 'wd', 'optim', 'batch_size', 'age_cuts', 'df_members', 'df_with_attrib', 'user_attrib_idx', 'item_attrib_idx', 'user_attrib_str', 'item_attrib_str', 'field_types', 'field_dims'])

In [8]:
def convert_cat_variables(dct):
    idx2member, member2idx = datalib.cat2dict(dct['df_with_attrib']['MEMBER_ID'])
    idx2dest, dest2idx = datalib.cat2dict(dct['df_with_attrib']['D'])
    idx2gender, gender2idx = datalib.cat2dict(dct.df_with_attrib.GENDER)
    
    dct['idx2member'] = idx2member
    dct['member2idx'] = idx2member
    dct['idx2dest'] = idx2dest
    dct['dest2idx'] = dest2idx
    dct['idx2gender'] = idx2gender
    dct.gender2idx = gender2idx
    
    df1 = dct['df_with_attrib'].copy()
    df1['MEMBER_ID'] = df1['MEMBER_ID'].map(member2idx)
    df1['D'] = df1['D'].map(dest2idx)
    df1['GENDER'] = df1.GENDER.map(gender2idx)
    print("nb dest: ", len(dest2idx))
    print("nb members: ", len(idx2member))
    return df1

In [9]:
df1 = convert_cat_variables(dct)
dct['df_with_attrib'] = df1

nb dest:  86
nb members:  46321


In [10]:
%%time 
# split data into train / valid / test data sets
datalib.train_valid_dct(dct, 0.1, 0.2, temporal=True, shuffle=True)
dataset_train = datalib.myDataset(dct, dct.data_train)
dataset_valid = datalib.myDataset(dct, dct.data_valid)
dataset_test  = datalib.myDataset(dct, dct.data_test)
dct.dataset_train = dataset_train
dct.dataset_valid = dataset_valid
dct.keys()

(804187, 9)
Create torch.tensor on device
Create torch.tensor on device
Create torch.tensor on device
CPU times: user 5.34 s, sys: 616 ms, total: 5.95 s
Wall time: 5.28 s


dict_keys(['device', 'embed_dim', 'nb_epochs', 'lr', 'wd', 'optim', 'batch_size', 'age_cuts', 'df_members', 'df_with_attrib', 'user_attrib_idx', 'item_attrib_idx', 'user_attrib_str', 'item_attrib_str', 'field_types', 'field_dims', 'idx2member', 'member2idx', 'idx2dest', 'dest2idx', 'idx2gender', 'gender2idx', 'data_train', 'data_valid', 'data_test', 'dataset_train', 'dataset_valid'])

In [None]:
dataset_train[3]

In [13]:
loader_train = DataLoader(dataset_train, shuffle=True, batch_size=dct.batch_size)
loader_valid = DataLoader(dataset_valid, shuffle=True, batch_size=dct.batch_size)
loader_test  = DataLoader(dataset_test,  shuffle=True, batch_size=dct.batch_size)

In [None]:
%%time 
print(loader_train.batch_size)
print(loader_train.dataset.data.shape)
for i,d in enumerate(loader_train):
    if i == 5: break

## DataLoader is functional

# NOT USED
batch_size = dct.batch_size
files = "attrib_2016.csv.gz"
#data_dict = myfm.getData(files, batch_size=batch_size, nrows='all')
data_dict = myfm.getData(files, batch_size=batch_size, nrows=20000, shuffle=False)
data_dict



data_dict['train_iter'].dataset.df.shape[0]

len(dataset_train)

train_iter = data_dict['train_iter']
field_dims = train_iter.dataset.field_dims
field_dims

field_dims = 20   # MEANING?

# cpu: device : -1
# gpu: device : 0, 1, ...
gdct

# field_dims: number of categories for each attribute. 
# This should be defined in read_single_file

Create a method with dataset and DataLoader

In [None]:
dct.keys()

In [None]:
# Field_dims are based on the full dataset. As such, I might have 45,000 members in the 
# full dataset, but only 35,000 in the validation set. That wastes computational resources
# due to an enlarged embedding layer, but does it create other issues? I think not. 
embed_dim = dct['embed_dim']
device = dct['device']
field_dims = dct.field_dims.to(device)
# Better would be to capture all fields with element > 1. <<<< TODO IN FUTURE
field_dims = torch.cat([field_dims[0:2], field_dims[3:4]], axis=0)
print("field_dims: ", field_dims)
net = fm.FactorizationMachineModel(field_dims, embed_dim)
net.to(device)


In [None]:
dct.embed_dim = 20
dct.nb_epochs = 10
dct.lr = 0.03
dct.wd = 1.e-3
dct.optim = 'adamW'
dct.device = 'cuda'

In [None]:
wconfig = {
  'lr': dct.lr,
  'epochs': dct.nb_epochs,
  'batch_size': dct.batch_size,
  'optim': dct.optim,
  'wd': dct.wd,
  'embed_dim': dct.embed_dim,
  'device': 'cuda'
}

In [None]:
sweep_config = {
    'name' : 'sweep3',
    'method' : 'random',
    'parameters' : {
        'lr' : { 
            'distribution': 'log_uniform_values', 
            'min' : 1.e-4, 
            'max' : 5.e-2,
        },
        'optim' : { 'value' : 'adamW' },
        'wd' : { 
            'distribution' : 'log_uniform_values', 
            'min' : 1.e-5,
            'max' : 1.e-2,
        },
        'batch_size' : { 
            'distribution' : 'q_log_uniform_values', 'q' : 32,
            'min' : 32,
            'max' : 4096,
        },
        'epochs' : {'value' : 30},
        'embed_dim' : {'value' : 30},
        'device' : {'value' : 'cuda'},
    },
}

# pprint.pprint(sweep_config)

metric = {
        'name' : 'loss'
}

sweep_config['metric'] = metric

sweep_id = wandb.sweep(sweep_config, project="Copa Recommender", entity="erlebacher")

In [None]:
wconfig = {
  'lr': dct.lr,
  'epochs': dct.nb_epochs,
  'batch_size': dct.batch_size,
  'optim': dct.optim,
  'wd': dct.wd,
  'embed_dim': dct.embed_dim,
  'device': 'cuda',
}

# Optimal wd: around 3e-3. However, it only small effect on loss function. 
sweep_config4 = {
    'name' : 'sweep4',
    'method' : 'random',
    'parameters' : {
        'lr' : {  'value': 3.e-2, },
        'optim' : { 'value' : 'adamW' },
        'wd' : { 
            'distribution' : 'log_uniform_values', 
            'min' : 1.e-5,
            'max' : 1.e-2,
        },
        'batch_size' : {'value' : 1024},
        'epochs' : {'value' : 30},
        'embed_dim' : {'value' : 30},
        'device' : {'value' : 'cuda'},
    },
}

# pprint.pprint(sweep_config)

metric = {
        'name' : 'loss'
}

sweep_config4['metric'] = metric

sweep_id4 = wandb.sweep(sweep_config4, project="Copa Recommender", entity="erlebacher")

In [None]:
import pprint
pprint.pprint(sweep_config4)

In [14]:
def build_dataset(batch_size):
    datalib.train_valid_dct(dct, 0.1, 0.2, temporal=True, shuffle=True)
    dataset_train = datalib.myDataset(dct, dct.data_train)
    loader_train = DataLoader(dataset_train, shuffle=True, batch_size=dct.batch_size)
    return loader_train

def build_network(config, dct):
    # device = config['device']
    device = dct.device
    embed_dim = config['embed_dim']
    field_dims = dct.field_dims.to(device)
    # Better would be to capture all fields with element > 1. <<<< TODO IN FUTURE
    #   0:2 : MEMBER_ID, D (or negD),  3:4 : GENDER
    field_dims = torch.cat([field_dims[0:2], field_dims[3:4]], axis=0)
    network = fm.FactorizationMachineModel(field_dims, embed_dim)
    return network.to(device)

def bpr_loss_func(pos, neg):
    return -torch.log(torch.sigmoid(pos-neg)).sum()

def build_optimizer(dct, network, lr, wd): 
    if dct.optim == "sgd":
        optimizer = torch.optim.SGD(network.parameters(),
                              lr=lr, momentum=0.9, 
                              weight_decay=wd)
    elif dct.optim == "adam":
        optimizer = torch.optim.Adam(network.parameters(),
                               lr=lr, 
                               weight_decay=wd)
    elif dct.optim == "adamW":
        optimizer = torch.optim.AdamW(network.parameters(),
                               lr=lr,
                               weight_decay=wd)
    return optimizer


def train_epoch(network, loader, optimizer, loss_func, nb_epochs, wandb=None):
    lowest_loss = 1.e10
    epoch_lowest_loss = 0
    
    for epoch in range(nb_epochs):
        total_loss = myfm.train_epoch_new(network, optimizer, loader, loss_func, device=dct.device, log_interval=50)
        if epoch == 0 and wandb:
            wandb.run.summary["initial_loss"] = total_loss
        if total_loss < lowest_loss and wandb:
            lowest_loss = total_loss
            epoch_lowest_loss = epoch
            wandb.run.summary["lowest_loss"] = lowest_loss
            wandb.run.summary["epoch_lowest_loss"] = epoch_lowest_loss
        # losses.append(total_loss)
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, avg total_loss (per training sample): ", total_loss)
        if wandb:
            wandb.log({"loss": total_loss, "epoch":epoch})

In [15]:
def train(config=None):
    # Initialize a new wandb run
    print("train, config: ", config) # epochs=10
    # Did not work (next line)
    wandb.config.update(config)  # this should not be required to update wandb.config!
    
    with wandb.init(config=config) as run:
        # Did not work
        
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config  # epochs=30. WHY? 
        print("wandb.config: ", config)

        loader = build_dataset(config['batch_size'])
        network = build_network(config, dct)
        optimizer = build_optimizer(dct, network, config['lr'], config['wd'])
        loss_func = bpr_loss_func
        nb_epochs = config["epochs"]
        print("nb_epochs: ", nb_epochs)
        avg_loss = train_epoch(network, loader, optimizer, loss_func, nb_epochs)
    return network

In [16]:
def train_no_wandb(dct, wandb=None):
    # Initialize a new wandb run
    
    config = None #
    
    if True:
        loader = build_dataset(dct.batch_size)
        # Note that the first  arg is really config (equal to dct in this case)
        network = build_network(dct, dct)
        optimizer = build_optimizer(dct, network, dct.lr, dct.wd)
        loss_func = bpr_loss_func
        nb_epochs = dct.nb_epochs
        print("nb_epochs: ", nb_epochs)
        avg_loss = train_epoch(network, loader, optimizer, loss_func, nb_epochs, wandb)
    return network

In [None]:
#wandb.agent(sweep_id, train, count=100)
# random search over wd
wandb.agent(sweep_id4, train, count=20)

In [None]:

run = wandb.init(project="Copa Recommender",
            config=config,
            save_code=True)

# Optional
wandb.watch(net)  # model

In [None]:
wanda_dict = AttrDict()
to_save = ["embed_dim", "nb_epochs", "lr", "wd", "optim", "batch_size", "nb_epochs", "device"]
for s in to_save:
    wanda_dict[s] = dct[s]

# Test accuracy
Once I get this working on a single case, I can execute this at every epoch and monitor its increase. 

In [17]:
dct.embed_dim = 20
dct.nb_epochs = 10
dct.lr = 0.03
dct.wd = 1.e-3
dct.optim = 'adamW'
dct.device = 'cuda'

wconfig = {
  'lr': dct.lr,
  'epochs': dct.nb_epochs,
  'batch_size': dct.batch_size,
  'optim': dct.optim,
  'wd': dct.wd,
  'embed_dim': dct.embed_dim,
  'device': 'cuda'
}

In [25]:
#wandb.config = wconfig
model = train_no_wandb(dct)

(804187, 9)
Create torch.tensor on device
FeaturesLinear, field_dims:  tensor([46321,    86,     3], device='cuda:0') tensor(46410, device='cuda:0')
nb_epochs:  10
elapased time per epoch:  448.3843078613281
Epoch 0, avg total_loss (per training sample):  0.0012712066232462003
elapased time per epoch:  446.4606018066406
elapased time per epoch:  446.7313232421875
elapased time per epoch:  563.954833984375
elapased time per epoch:  444.5259094238281
elapased time per epoch:  443.090087890625
elapased time per epoch:  568.2094116210938
elapased time per epoch:  447.16705322265625
elapased time per epoch:  448.4803466796875
elapased time per epoch:  568.7149658203125


In [None]:
myfm.test_accuracy(model, loader_train, dct.device)

In [191]:
import itertools

In [218]:
%%time
npts = 100
npts = 300000
M = np.random.choice(list(range(10)), npts)
D = [list(np.random.randint(0,20,10)) for i in M]
np.random.randint(0, 10)
#print("M: ", M)
#print("D: ", D)

MM = [[M[i]] * len(D[i]) for i in range(len(M))]
#print(MM)
def flatten(lst):
    return list(itertools.chain(*lst))
DD = flatten(D)
MM = flatten(MM)
print(len(DD), len(MM))
#print(flatten(D))
#print(flatten(MM))

3000000 3000000
CPU times: user 2.1 s, sys: 73.6 ms, total: 2.17 s
Wall time: 2.1 s


In [183]:
aa = itertools.chain(*[[5,2],[2,3,5]])

In [291]:
dct.df_members['D'].sum(

184610    LIM
376198    HAV
556112    PTY
539512    GUA
60815     PTY
         ... 
473973    SNU
705784    CUN
150110    SAL
683002    PTY
400597    MDE
Name: D, Length: 714086, dtype: object

In [294]:
list(aa)

[]

In [336]:
%%time 
myfm.recommender(model, dct)  # 9 sec

dict_keys(['device', 'embed_dim', 'nb_epochs', 'lr', 'wd', 'optim', 'batch_size', 'age_cuts', 'df_members', 'df_with_attrib', 'user_attrib_idx', 'item_attrib_idx', 'user_attrib_str', 'item_attrib_str', 'field_types', 'field_dims', 'idx2member', 'member2idx', 'idx2dest', 'dest2idx', 'idx2gender', 'gender2idx', 'data_train', 'data_valid', 'data_test', 'dataset_train', 'dataset_valid', 'train_dest_sets', 'valid_dest_sets'])
user_attrib:  (55073, 9)
data_valid unique members:  26235
user_attrib unique members:  16669
data_valid:  (55073, 9)
all_dest len:  86
nb of unique dest in the training set:  (74,)
nb of unique dest in the validation set:  (76,)
pairs.shape:  (1233506, 2)
hit rate (without previous filter) =  0.3581498590197372
res1.columns:  Index(['D', 'pred', 'argsort', 'D1', 'pred1'], dtype='object')
train_dest_sets:  (18578, 2) Index(['D', 'MEMBER_ID'], dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trai['neg_set'] = trai['D'].map(lambda x: all_dest_set.difference(x))


df_flat.shape: (1193346, 2)
    MM  DD
0   0   1
1   0   2
2   0   3
3   0   5
4   0   7
CPU times: user 7.06 s, sys: 87.3 ms, total: 7.15 s
Wall time: 7.03 s


#### Incorporate hyperparameter search within a class
---

In [264]:
[3,5]

[3, 5]

## Candidate items to add to the Wandab logging dictionary
* Average time per iteration
* Time for set up

## To do
* How to save dependencies python files? 
* Work under poetry to make sure I have the proper Python environment

In [None]:
%%time
nb_epochs = dct.nb_epochs
print("nb_epochs: ", dct.nb_epochs)
losses = []
print("dct.keys(): ", dct.keys())
print("user attr str: ", dct.user_attrib_str)
print("item attr str: ", dct.item_attrib_str)
print("device: ", dct.device)

lowest_loss = 1.e10
lowest_epoch = 0

loader_train = DataLoader(dataset_train, shuffle=True, batch_size=dct.batch_size)  # Already defined

for epoch in range(nb_epochs):   # replace gdct['device'] by dct.device
    total_loss = myfm.train_epoch_new(net, optimizer, loader_train, loss_func, device=dct['device'], log_interval=10)
    if epoch == 0:
        wandb.run.summary["initial_loss"] = total_loss
    if total_loss < lowest_loss:
        lowest_loss = total_loss
        epoch_lowest_loss = epoch
        wandb.run.summary["lowest_loss"] = lowest_loss
        wandb.run.summary["epoch_lowest_loss"] = epoch_lowest_loss
    losses.append(total_loss)
    if epoch % 1 == 0:
        print(f"Epoch {epoch}, avg total_loss (per training sample): ", total_loss)
        
    wandb.log({"loss": total_loss, "epoch":epoch})
        
#  ERROR: Check that all variables are on the same device. HOW TO DO THIS? try 'cuda'
#  8500 training samples
# time GPU, 5.6 sec for 4 epochs, batch 512
# time CPU, 5.8 sec for 4 epochs, batch 512
# time CPU, 7.1 sec for 4 epochs, batch 32
# time CPU, 5.7 sec for 4 epochs, batch 4096
# time CPU, 6.8 sec for 4 epochs, batch 32
# time GPU, 7.0 sec for 4 epochs, batch 32
# time GPU, 5.0 sec for 4 epochs, batch 4096

# ERROR? The loss per training sample should be independent of the batch size

# x: one of its elements is 46475, and yet, the max index should be  46458. Why is this happening? Max index should be 46410 (sum of field_dims)

# I may need to improve my selection of negative samples to speed up convergence. This is much much slower than rankfm. Why? 
# 1) I might have an error
# 2) rankfm is written in C. So I should compare convergence rates between the two when running only MEMBER_ID, DEST, GENDER as one-hot encoded attributes. 
# It is also time to get wandb going so I can save my data. 
# What do I want to save? 
#   total_loss, lr, nb_epochs, device, optim, batch_size, wd, embed_dim

In [None]:
dct.keys()

In [None]:
list(net.parameters())[0].get_device()  # -1 for cpu

In [None]:
a = list(net.parameters())[0].device
a

In [None]:
%%time
test_iter = data_dict['train_iter']
print("length: ", len(test_iter.dataset))
test_iter = DataLoader(data_dict['train_data'], batch_size=4*1024, shuffle=True)
# fields: original dataframe as a torch array
# scores: scores from original dataframe
fields, scores = myfm.test_accuracy(net, test_iter, 'cpu')

In [None]:
for i,data in enumerate(test_iter):
    # data[0].shape = (B,3). Elements are member, item, age
    print(i, data[0].shape, data[1].shape, data[2].shape)
    break

Select a sample of members, and compute scores for all destinations

In [None]:
data_iter = data_dict['train_iter']
data_iter.dataset.dct.keys()
dct = data_iter.dataset.dct
dct.keys()

In [None]:
df = data_iter.dataset.df # dataframe
print(df.shape)
nb_members = 1000  # select random members
max_member = df['MEMBER_ID'].max()
max_dest = df['D'].max()
members = random.sample(range(0,max_member), 100)
destinations = list(range(0,max_dest+1))  # 0, 1, ..., max_dest
print("dest: ", destinations)
print("members[0]: ", members[0])
print("Size: ", df.groupby(['MEMBER_ID','D']).size().sum())
row = df.iloc[members[0],:]
print("row: ", row)

# create a dataframe with members*max_dest rows. 10000*100 = one million
# How to do this? 
#  1. create a specialized Dataset

#print('member_attr: ', dct['member_attr'])
#dct['idx2member'].keys()

In [None]:
data = myfm.AccuracyDataset(data_iter.dataset, destinations)
len(data)

In [None]:
# shuffling is irrelevant
accuracy_loader = DataLoader(data, batch_size=4096, shuffle=False)
#accuracy_loader = DataLoader(data, batch_size=gdct['batch_size'], shuffle=False)

In [None]:
fields, predict = myfm.test_accuracy(net, train_iter, 'cpu')

print("fields: ", fields[0:5])
print("predict: ", predict[0:5])

# Strong decrease in loss. However, is this overfitting? 
# TODO: create a pair-wise approach. So define negative samples. 
# Could weigh the negative samples: flights not taken in the further back in time would have higher weight 
#  than more recent flights. Is that reasonable? 

In [None]:
fields, predict = myfm.test_accuracy(net, accuracy_loader, 'cpu')

print("fields: ", fields[0:5])
print("predict: ", predict[0:50])
print(fields.shape, predict.shape)

# Strong decrease in loss. However, is this overfitting? 
# TODO: create a pair-wise approach. So define negative samples. 
# Could weigh the negative samples: flights not taken in the further back in time would have higher weight 
#  than more recent flights. Is that reasonable? 

In [None]:
# fields: member_id, dest, age
# prediction: score
field_np = np.asarray(fields)
predict_np = np.asarray(predict)
#print(field_np.shape, predict_np.reshape(-1,1).shape)
joined = np.concatenate((field_np, predict_np.reshape(-1,1)), axis=1)
#print(joined[0:7,:])
df = pd.DataFrame(joined, columns=['MEMBER_ID','D','age','rank']) #, predict)
#print(df.head())

cols = list(df.columns)[0:-1]
for col in cols:
    df[col] = df[col].astype('int')
df = df.iloc[1:]

# data_iter: used for training
D_set = data_iter.dataset.dct['D_set']
print(len(D_set))
D_set.iloc[35], D_set.loc[35]

# Why is first row have numbers approx 1.e31? This is the input data. It has nothing to do with the evaluator. 
# for i in range(predict.shape[0]):

merged = df.merge(D_set, how='inner', on='MEMBER_ID')
print("merged shape (all scores): ", merged.shape, merged['MEMBER_ID'].nunique())  # 2218 unique members
merged = merged[merged['rank'] > 0.5]
print("merged shape (scores > 0.5): ", merged.shape, merged['MEMBER_ID'].nunique())  # 2170 unique members
print(merged.head(10))

Determined the topN scores for all members in order. 

In [None]:
df1 = merged.groupby('MEMBER_ID').agg({'rank':list})
# df1 = merged.groupby('MEMBER_ID')['rank'].transform('count') #agg({'rank':list})
# print(df1)
print(df1.shape)

def sort_func(col):
    col1 = np.asarray(col) #.argsort()
    col1 = np.asarray(col).argsort()
    #col1 = sorted(col1, reverse=True)
    return col1
    
rank = df1['rank'].apply(sort_func)
df2 = df1.copy()
df2['argrank'] = rank
print(df2.head())
print("df2.shape: ", df2.shape)
df2['D'] = [list(range(0,len(D_set)))] * len(df2)
print("len(list(range(0,len(D_set))))= ", len(list(range(0,len(D_set)))) )
print("D_set: ", D_set)
df2
# df3 = pd.concat([_df, df2], axis=1)
# _df.shape, df2.shape

In [None]:
dst = list(data_iter.dataset.dct['idx2dest'].keys())
df
dst   # destinations 0 - 75 (76 values)
# I wish to apply argsort to them
df2['Dlist'] = [dst] * df2.shape[0]
# df2

# apply argrank to D Dlist

In [None]:
plt.plot(losses)
p

Compute scores of training data. 
* For each member_id, compute score for each destination. Rank destinations and compare against destinations actually travelled. 
* consider the 2016 data. For each user+user_attributes, cover a range of destinations. Each destination has its own destination attributes. 
Consider $n$ examples of destination attributes, compute a ranking of these $n$ items. There are 80 destinations and their attributes. Finally, 
there are attributes that are neither member destinations or desination attributes. 