In [54]:
from tensorflow import keras
import random 
import numpy as np
import pandas as pd
import math

In [67]:
class DraftGenerator(keras.utils.Sequence) :
  
    indices = None
    batch_size = None
    columns = None
    card_names = None
    
    def __init__(self, filename, start, end, batch_size, columns, card_names) :
        self.columns = columns
        self.batch_size = batch_size
        self.filename = filename
        self.card_names = card_names
        self.indices = list(range(start, end, batch_size))
        random.shuffle(self.indices)

    
    def __len__(self) :
        return len(self.indices)
  
  
    def __getitem__(self, idx):
        #print(idx)
        data = pd.read_csv(self.filename, index_col=0, skiprows = range(1, self.indices[idx]), nrows = self.batch_size)
        #print(data.columns)
        x = data[self.columns]
        y = data["pick"]
        y_index = [self.card_names.index(y_name) for y_name in y]
        y = keras.utils.to_categorical(y_index, len(self.card_names))
        return (x,y)
    


In [104]:
class DraftChunkGenerator(keras.utils.Sequence) :
  
    indices = None
    batch_size = None
    columns = None
    card_names = None
    drafts_iterator = None
    start = None
    end = None
    
    def __init__(self, filename, start, end, batch_size, columns, card_names) :
        self.columns = columns
        self.batch_size = batch_size
        self.filename = filename
        self.card_names = card_names
        self.start = start
        self.end = end
        self.initialize_generator()
        
    def initialize_generator(self):
        self.drafts_iterator = pd.read_csv(self.filename, skiprows = range(1, self.start), 
                                           nrows = self.end - self.start, chunksize=self.batch_size)
        
    def on_epoch_end(self):
        self.initialize_generator()
    
    def __len__(self) :
        return math.ceil((self.end-self.start) / self.batch_size)
  
    def __getitem__(self, idx):
        #print(idx)
        data = next(self.drafts_iterator)
        #print(data.columns)
        x = data[self.columns]
        y = data["pick"]
        y_index = [self.card_names.index(y_name) for y_name in y]
        y = keras.utils.to_categorical(y_index, len(self.card_names))
        return ()




In [145]:
class DraftReadFileGenerator(keras.utils.Sequence) :
  
    indices = None
    batch_size = None
    columns = None
    card_names = None
    x = None
    y = None
    start = None
    end = None
    current = None
    
    def __init__(self, filename, start, end, batch_size, read_size, columns, card_names) :
        self.columns = columns
        self.batch_size = batch_size
        self.read_size = read_size 
        self.filename = filename
        self.card_names = card_names
        self.start = start
        self.end = end
        self.current = 0
        self.initialize_generator()
        
    def initialize_generator(self):
        local_start = self.start + self.current*self.read_size
        data = pd.read_csv(self.filename, skiprows = range(1, local_start), nrows = self.read_size)
        self.x = data[self.columns]
        y = data["pick"]
        y_index = [self.card_names.index(y_name) for y_name in y]
        self.y = keras.utils.to_categorical(y_index, len(self.card_names))
        self.indices = list(range(0, self.read_size, self.batch_size))
        random.shuffle(self.indices)
        
    def on_epoch_end(self):
        self.current = 0
        self.initialize_generator()
    
    def __len__(self) :
        return math.ceil((self.end-self.start) / self.batch_size)
  
    def __getitem__(self, idx):
        part = idx // (self.read_size / self.batch_size)
        idx = int(idx % (self.read_size / self.batch_size))
        if part != self.current:
            self.current += 1
            self.initialize_generator()
        start = self.indices[idx] 
        end = start + self.batch_size
        return (self.x.iloc[start:end], self.y[start:end])

    def all_data(self):
        data = pd.read_csv(self.filename, skiprows = range(1, self.start), nrows = self.end-self.start)
        self.x = data[self.columns]
        y = data["pick"]
        y_index = [self.card_names.index(y_name) for y_name in y]
        self.y = keras.utils.to_categorical(y_index, len(self.card_names))
        return x, y



In [49]:
#all_drafts_iterator = pd.read_csv("./data/VOW_prepared.csv", chunksize=10000)

In [60]:
#relevant_drafts = pd.read_csv("./data/VOW_prepared.csv", index_col=0, nrows = 1)
#card_names = [c.split("_")[1] for c in relevant_drafts.columns if "pool_" in c]

In [8]:
#card_names = [c.split("_")[1] for c in relevant_drafts.columns if "pool_" in c]

In [68]:
#a = DraftGenerator("./Data/VOW_prepared.csv", 100, 10000, 128, ["R", "G", "W"], card_names)

In [64]:
#len(a)

78

In [69]:
#%%timeit
#for _ in a:
#    continue

3.87 s ± 44.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [110]:
#b = DraftChunkGenerator("./Data/VOW_prepared.csv", 100, 10000, 128, ["R", "G", "W"], card_names)

In [111]:
#len(b)

78

In [112]:
#%%timeit
#for _ in b:
#    continue
#b.on_epoch_end()

2.68 s ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [142]:
#c = DraftReadFileGenerator("./Data/VOW_prepared.csv", 100, 30000, 128, 12800, ["R", "G", "W"], card_names)

In [143]:
#%%timeit
#for _ in c:
#    continue
#c.on_epoch_end()

1.47 s ± 19.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
