In [1]:
import itertools
import pandas as pd
import numpy as np
import random
import csv
import time

import matplotlib.pyplot as plt

import tensorflow as tf

import keras.backend as K
from keras import Sequential
from keras.layers import Dense, Dropout

## DataGenerator
---

In [9]:
class DataGenerator():
    def __init__(self, datapath):
        ''' 
        Load data from the DB Books
        List the users and items
        List all the users historic
        '''
        self.data = self.load_datas(datapath)
        self.users = self.data['user'].unique()
        self.items = self.data['item'].unique()
        self.histo = self.gen_histo()
        self.train = []
        self.test = []

    def load_datas(self, datapath):
        '''
        Load the data and merge the name of each books
        A row corresponds to a rate given by a user to books

        Parameters
        ----------
        datapath:   string, path to the data books contain user, item, rating, timestamp

        Returns
        -------
        result:     DataFram, contains all the ratings
        '''
        data = pd.read_csv(datapath, names=['item', 'user', 'rating', 'timestamp'])
        data = data[:1000]

        return data
    
    def gen_histo(self):
        '''
        Group all rates given by users and store them from older to most recent

        Returns
        -------
        result:     List(DataFrame), List of the historic for each user
        '''
        historic_user = []
        for i, u in enumerate(self.users):
            temp = self.data[self.data['user'] == u]
            temp = temp.sort_values('timestamp').reset_index()
            temp.drop('index', axis=1, inplace=True)
            historic_user.append(temp)
        return historic_user
    
    def sample_histo(self, user_histo, action_ratio=0.8, 
                     max_samp_by_user=5, max_state=100, max_action=50, nb_states=[], nb_actions=[]):
        '''
        For a given historic, make one or multiple sampling.
        If no optional argument given for nb_states and nb_actions, 
        then the sampling is random and each sample can have differents size for action and state.
        To normalize sampling we need to give list of the numbers of states and actions to be sampled

        Parameters
        ----------
        user_histo:         DataFrame, historic of user
        delimiter:          float, optional delimiter for the csv
        action_ratio:       float, optional ratio form which books in history will be selected
        max_samp_by_user:   int, optional Number max of sample to make by user
        max_state:          int, optional Number max of books to take for the 'state' column
        max_action:         int, optional Number max of books to take for the 'action' column
        nb_state:           array(int), optional Numbers of books to be taken for each sample made on user's historic
        nb_actions:         array(int), optional Numbers of rating to be taken for each sample made on user's historic

        Returns
        -------
        states:             List(String), All the states sampled, format of a sample: item & rating
        actions:            List(String), All the actions sampled, format of a sample: item & rating

        Notes
        -------
        States must be before(timestamp) the actions.
        If given, size of nb_states is the number of sample by user size of nb_states and nb_actions must be equals
        '''

        n = len(user_histo)
        print(n)
        sep = int(action_ratio * n)
        nb_sample = random.randint(1, max_samp_by_user)
        if not nb_states:
            nb_states = [min(random.randint(1, sep), max_state) for i in range(nb_sample)]
        if not nb_actions:
            nb_actions = [min(random.randint(1, n-sep), max_action) for i in range(nb_sample)]
        
        assert len(nb_states) == len(nb_actions)

        states = []
        actions = []

        # SELECT SAMPLES IN HISTO
        for i in range(len(nb_states)):
            sample_states = user_histo.iloc[0:sep].sample(nb_states[i])
            sample_actions = user_histo.iloc[-(n-sep):].sample(nb_actions[i])

            sample_state = []
            sample_action = []
            for j in range(nb_states[i]):
                row = sample_states.iloc[j]
                # FORMAT STATE
                state = str(row.loc['item']) + '&' + str(row.loc['rating'])
                sample_state.append(state)
            
            for j in range(nb_actions[i]):
                row = sample_actions.iloc[j]
                # FORMAT ACTION
                action = str(row.loc['item']) + '&' + str(row.loc['rating'])
                sample_action.append(action)
            
            states.append(sample_state)
            actions.append(sample_action)
        
        return states, actions

    def gen_train_test(self, test_ratio, seed=None):
        '''
        Shuffle the historic of users and seperate it in a train and a test set.
        Store the ids for each set.
        An user can't be in both set.

        Parameters
        -----------
        test_ratio:     float, ratio to control the sizes of the sets
        seed:           float, seed on the shuffle
        '''

        n = len(self.histo)

        if seed is not None:
            random.Random(seed).shuffle(self.histo)
        else:
            random.shuffle(self.histo)

        self.train = self.histo[:int((test_ratio * n))]
        self.test = self.histo[int((test_ratio * n)):]
        self.user_train = [h.iloc[0,0] for h in self.train]
        print(self.user_train)
        self.user_test = [h.iloc[0,0] for h in self.test]

    def write_csv(self, filename, histo_to_write, delimiter=';', action_ratio=0.8, 
                  max_samp_by_user=5, max_state=100, max_action=50, nb_states=[], nb_actions=[]):
        '''
        From a given historic, create a csv file with the format
        Columns:        state, action_reward, n_state
        Rows:           item&rating1 | item&rating2 | ...item&rating3 |... at filename location.

        Paramters
        ----------
        filename:           string, path to the file to be produced
        histo_to_write:     list(DataFrame), list of the historic for each user
        delimiter:          string, optional delimiter for the csv
        action_ratio:       float, optional ratio form which books in history will be selected
        max_samp_by_user:   int, optional Number max of sample to make by user
        max_state :         int, optional Number max of books to take for the 'state' column
        max_action :        int, optional Number max of books to take for the 'action' action
        nb_states :         array(int), optional Numbers of books to be taken for each sample made on user's historic
        nb_actions :        array(int), optional Numbers of rating to be taken for each sample made on user's historic

        Notes
        -----
        if given, size of nb_states is the number of sample by user sizes of nb_states and nb_actions must be equals
        '''
        with open(filename, mode='w') as file:
            f_writer = csv.writer(file, delimiter=delimiter)
            f_writer.writerow(['state', 'action_reward', 'n_state'])
            for user_histo in histo_to_write:
                states, actions = self.sample_histo(user_histo, action_ratio, 
                                                    max_samp_by_user, max_state, max_action, nb_states, nb_actions)
                for i in range(len(states)):
                    # FORMAT STATE
                    state_str = '|'.join(states[i])
                    # FORMAT ACTION
                    action_str = '|'.join(actions[i])
                    # FORMAT N_STATE
                    n_state_str = state_str + '|' + action_str
                    f_writer.writerow([state_str, action_str, n_state_str])


### Data

In [10]:
datapath = 'Books.csv'

In [11]:
# Hyperparameters
history_length = 12 # N in article
ra_length = 4 # K in article
discount_factor = 0.99 # Gamma in Bellman equation
actor_lr = 0.0001
critic_lr = 0.001
tau = 0.001 # τ in Algorithm 3
batch_size = 64
nb_episodes = 100
nb_rounds = 50
filename_summary = 'summary.txt'
alpha = 0.5 # α (alpha) in Equation (1)
gamma = 0.9 # Γ (Gamma) in Equation (4)
buffer_size = 1000000 # Size of replay memory D in article
fixed_length = True # Fixed memory length


dg = DataGenerator(datapath)
dg.gen_train_test(0.8, seed=42)

print(len(dg.train))
print(len(dg.test))
print('train: ', dg.train[:10])
print('test:', dg.test[:10])

#dg.write_csv('books_train.csv', dg.train, nb_states=[history_length], nb_actions=[ra_length])
#dg.write_csv('books_test.csv', dg_test, nb_states=[history_length], nb_actions=[ra_length])

#data = read_file('books_train.csv')

['0001384198', '0001384198', '0001384198', '0002005263', '0001713353', '0001384198', '0001932349', '0001384198', '0002005263', '0001384198', '0001384198', '0002005263', '0001384198', '0001384198', '0001384198', '0001384198', '0001384198', '0001061240', '0001384198', '0002005263', '0001384198', '0001384198', '0001384198', '0001384198', '0001384198', '0001384198', '0002005263', '0002005263', '0001384198', '0001384198', '0002005263', '0001384198', '0001384198', '0002005263', '0002005263', '0001384198', '0001384198', '0001384198', '0001061240', '0001384198', '0001061240', '0001384198', '0001384198', '0001384198', '0001384198', '0002005263', '0002005263', '0002005263', '0001384198', '0001712799', '0002005263', '0001384198', '0001713353', '0002005263', '0001384198', '0001384198', '0002005263', '0001384198', '0001384198', '0001384198', '0001384198', '0001384198', '0001384198', '0002005263', '0001384198', '0002005263', '0001384198', '0001713353', '0002005263', '0001713353', '0001713353', '0002