In [141]:
import numpy as np
import pandas as pd
import random
import operator
import glob

import tensorflow as tf
from tensorflow import keras
import autokeras as ak

In [142]:
MODEL_PATH_S = "/home/eva_01/Documents/mi/AtomWaffe/best_model"
MODEL_PATH_A = "/home/eva_01/Documents/mi/AtomWaffe_conversion/best_model"

data_path   = "/home/eva_01/Documents/mi/train_data/*.npy"

In [143]:
npy_data = [np.load(i,allow_pickle=True) for i in glob.glob(data_path)]
data = np.asarray(npy_data)

input_arr  = np.asarray([i[0] for i in data])
output_arr = np.asarray([i[1] for i in data])

In [450]:
npy_data = [np.load(i,allow_pickle=True) for i in glob.glob("/home/eva_01/Documents/mi/train_data_conversion/*.npy")]
data = np.asarray(npy_data)
input_arr_conv  = np.asarray([i[0] for i in data])
output_arr_conv = np.asarray([i[1] for i in data])

In [144]:
input_arr.shape

(1177, 28)

In [146]:
class Bounds:
    '''
    Calculates lower and upper unit(sourc/medium,campaigns) bounds.
    Bounds calculation logic : [mean - std * k,mean + std * k]
    '''
    
    def __init__(self,arr,k=1.25):
        '''
        k - multiplicator for bound creation. More multiplicator -> wider bounds
        arr(budget data) - (N x M) shape.
        N - number of observations. M - number of units (campaigns).
        '''
        self.arr = arr
        self.k   = k
        
    def stat(self):
        mean_values = self.arr.mean(axis=0) #calc mean value by each unit
        std_values  = self.arr.std(axis=0)  #calc std value by each unit
        '''
        `arr_stat` - [mean,std] array by each unit
        Return : arr_stat shape (N x 2).
        N - number of units(campaigns).
        '''
        arr_stat = np.vstack((mean_values,std_values)).T #[mean,std] array by each unit
        
        return arr_stat
        
    def create_bound(self,mode='safe',k=None):
        '''
        Method can create bounds different ways.
        `mode` helps create bounds with custom or default  settings
        `mode` == `safe`   - use default multiplicator
        `mode` == `custom` - use custom multiplicator
        '''
        if mode == 'safe':
            coef = self.k
        elif mode == 'custom':
            if k == None:
                coef = self.k
            else:
                coef = k
        else:
            raise ValueError("`{}` module does not exist. Try ....".format(mode))
            
        print(mode,coef)
        
        arr_stat = self.stat()
        low  = arr_stat[:,0] - coef * arr_stat[:,1] #calculate lower bound by each unit
        high = arr_stat[:,0] + coef * arr_stat[:,1] #calculate upper bound by each unit
        
        '''
        `arr_bound` shape (N x 2). N -number of units(campaigns)
        [lower_bound,upper_bound] array by each unit
        '''
        arr_bound  = np.vstack((low,high)).T
        
        return self.safe_bound(arr_bound)
    
    def safe_bound(self,arr):
        '''
        Replace value with 0 if lower bound is less than 0
        '''
        
        return np.where(arr < 0, 0, arr)

In [148]:
unit_bounds = Bounds(input_arr).create_bound(mode='safe')

safe 1.25


In [375]:
unit_bounds

array([[  27552.30181585,   56646.39272192],
       [   6353.16241345,   60458.36201214],
       [  20927.24080563,   28931.06377802],
       [ 255197.02866329,  311491.17938911],
       [  42746.54821222,   52279.66225536],
       [  50908.45503345,   88360.1056854 ],
       [      0.        ,  102450.89519362],
       [  55931.73209658,  216409.61190133],
       [   7157.0661888 ,   36519.45185641],
       [ 164512.55343312,  664453.42872205],
       [ 543176.42584411,  780865.07217167],
       [   8640.62330905,  331561.95191749],
       [  29519.8076184 ,  196269.74822914],
       [  11315.03875455,   54034.42765455],
       [  84524.91171291,   98843.72041205],
       [ 926541.4884913 ,  995600.41021174],
       [  37901.3541006 ,  113682.3035491 ],
       [  34721.13437147,   73078.77092939],
       [  39975.17584857,   74676.82845704],
       [  22127.09124923,   47011.34026465],
       [  26228.16475812,   34254.53868049],
       [  16371.09435204,   44597.29548599],
       [  

In [378]:
test = population[0]

In [387]:
np.less(unit_bounds[:,0],population)

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [388]:
np.less(population,unit_bounds[:,0])

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [149]:
def create_chromosome(unit_bounds):
    '''
    Create chromosome(budget split for unit) considering its lower and upper bounds
    '''
    values = np.random.randint(unit_bounds[:,0],unit_bounds[:,1])
    return values

In [150]:
def create_population(size,unit_bounds):
    '''
    Create init population(chromosomes set)
    `unit_bounds` - unit bounds by rach unit
    `size` - population size
    Return : Array (size x M). M - number of units (campaigns)
    '''
    population = []
    
    for i in range(0,size):
        chromosome = create_chromosome(unit_bounds) #create chromosome
        population.append(chromosome)
        
    return np.array(population)

In [368]:
class Fitness:
    '''
    Calculate Fitness func for each chromosome in population array
    '''
    
    def __init__(self,arr):
        '''arr - input population array'''
        self.arr         = arr
    
    def conversion_calc(self):
        '''
        `model_share` - loaded model for MediaInfluence prediction
        `model_absolute` - loaded model for Conversion(conversion sum by each unit) number prediction
        Return : `conv` - conversion split by each unit(campaign)
        '''
        model_share    = keras.models.load_model(MODEL_PATH_S)
        model_absolute = keras.models.load_model(MODEL_PATH_A)
        print(self.arr.shape)
    
        share     = model_share.predict(self.arr) # predict conversion share split
        absolute  = model_absolute.predict(self.arr) # predict conversion sum 
        conv      = share * absolute # calcultate number of conversion by each unit(campaign)
        
        return conv
    
    def fitness_func(self,alpha):
        '''
        `alpha` - multiplicator pointing importance of CPA value in fitness function.
        less `alpha` - less CPA weight in fitness function
        Return : Fitness function
        '''
        conv        = self.conversion_calc().sum(axis=1) #Calaculate sum of conversion by each chromosome (max)
        cpa         = self.arr.sum(axis=1) / conv #Calculate cpa by each chromosome
        inv_cpa     = 1 / cpa # Invert CPA (max)

#         norm_conv    = conv / np.linalg.norm(conv) #normalized conversion vector
#         norm_inv_cpa = inv_cpa / np.linalg.norm(inv_cpa) #normalized invert CPA vector
        
#         print(norm_conv.shape,norm_inv_cpa.shape)
        
#         f = norm_conv + (alpha * norm_inv_cpa) # fitness func (maximize)

        f = conv
        
        return f
    

In [171]:
def rankFunc(fitness_results):
    '''
    Return sorted dict with chromosome indexes(keys) and its fitness function results(values)
    '''
    f_dict      = dict(zip(np.arange(0,fitness_results.shape[0]),fitness_results))
 
    return sorted(f_dict.items(), key = operator.itemgetter(1), reverse = True)

In [316]:
rankFunc(fitness)

[(810, 0.09466410072965695),
 (171, 0.0942979196114095),
 (91, 0.09311563333672879),
 (885, 0.092239528072745),
 (107, 0.09067893417653403),
 (584, 0.09058428183234143),
 (193, 0.09009413570872761),
 (0, 0.08953832817955769),
 (477, 0.08818933765585334),
 (179, 0.08750743333534287),
 (496, 0.08718779922312953),
 (124, 0.08715314954728574),
 (621, 0.08710648711011848),
 (531, 0.08699852175618111),
 (45, 0.08668445102517369),
 (552, 0.08660955652598552),
 (523, 0.08643265879390229),
 (699, 0.08557708294418186),
 (966, 0.08530952365126163),
 (372, 0.08527152624665557),
 (735, 0.08508873372888504),
 (711, 0.08452085345459218),
 (293, 0.08452069753762154),
 (755, 0.08391262241576439),
 (746, 0.08366873173364062),
 (409, 0.08365729079123985),
 (542, 0.08358793101660231),
 (669, 0.08357797800455295),
 (436, 0.08356013407500013),
 (95, 0.08300904721447203),
 (460, 0.08298652401174882),
 (237, 0.08292193839038692),
 (929, 0.08239931556180073),
 (962, 0.08232425995691087),
 (444, 0.0821627525531

In [24]:
def selection(popRanked,eliteSize):
    '''
    Select chromosomes using elitism mode
    Return : List with chromosome indexes for mating pool
    '''
    selectionResults = []
    
    df = pd.DataFrame(np.array(popRanked), columns=["Index","Fitness"])
    df['cum_sum']  = df.Fitness.cumsum()
    df['cum_perc'] = 100 * df.cum_sum/df.Fitness.sum()
    
    for i in range(0, eliteSize):
        selectionResults.append(popRanked[i][0]) #best chromosomes with the highest fitness functions
    for i in range(0, len(popRanked) - eliteSize): 
        pick = 100*random.random()
        for i in range(0, len(popRanked)):
            if pick <= df.iat[i,3]:
                selectionResults.append(popRanked[i][0])
                break
                
    return selectionResults

In [365]:
def breed(P1,P2):
    '''
    Create child chromosome from two parent chromosomes
    Return: `child` - new chromosome (Array)
    '''
#     mask        = np.random.randint(2,size=P1.shape[0],dtype=bool) #indexes for selection from first parent
#     invert_mask = np.invert(mask) #indexes for selection from second parent
    
#     childP1 = P1 * mask
#     childP2 = P2 * invert_mask
    
#     child = childP1 + childP2

    rand_index = np.random.choice(P1.shape[0])
    childP1    = P1[:rand_index]
    childP2    = P2[rand_index:]
    child      = np.hstack((childP1,childP2))
    
    return child

In [196]:
def matingPool(population, selectionResults):
    '''
    Create list with chromosomes for mating
    Return : List with chromosomes. List[Array]
    '''
    matingpool = []
    for i in range(len(selectionResults)):
        index = selectionResults[i]
        matingpool.append(population[index])
    return matingpool

In [155]:
def breedPopulation(matingPool, eliteSize):
    '''
    Breed Population
    '''
    children = []
    length = len(matingPool) - eliteSize
    pool = random.sample(matingPool, len(matingPool))

    for i in range(eliteSize):
        children.append(matingPool[i])
    
    for i in range(length):
        child = breed(pool[i], pool[len(matingPool)-i-1])
        children.append(child)
    return children

In [249]:
def mutate(individual,mutationRate,step):
    '''
    Mutation in chromosome.
    `mutationRate` - probability of mutaion. Higher `mutationRate` -> higher mutation probability
    `step` - percent the gen would be changed by . 
    np.random.choice([-1,1], 1)[0] - direction the gen would be changed. For example: +step% or -step%
    Return : mutated (maybe) chromosome (Array)
    '''
    individual_copied = individual.copy()
    for index in range(individual_copied.shape[0]):
        if random.random() < mutationRate:
            individual_copied[index] += individual_copied[index] *  np.random.choice([-1,1], 1)[0] * step
        else:
            pass
    return individual_copied   

In [250]:
def mutatePopulation(population, mutationRate, step):
    '''
    Mutations in population
    `mutationRate` - probability of mutaion. Higher `mutationRate` -> higher mutation probability
    `step` - percent the gen would be changed by 
    '''
    mutatedPop = []
    
    for individual in population:
        mutated = mutate(individual, mutationRate, step)
        mutatedPop.append(mutated)
    return mutatedPop

In [251]:
population = create_population(size=50,unit_bounds=unit_bounds)
population

array([[ 28469,  44658,  28094, ..., 295025, 226443,  96296],
       [ 35175,  30479,  23808, ..., 250280, 242285,  99298],
       [ 55954,  26217,  24588, ...,  43561, 334293, 102415],
       ...,
       [ 41864,  32308,  27341, ..., 301286, 320164,  79706],
       [ 27750,  53022,  27070, ..., 304742, 450371,  95478],
       [ 40851,  37373,  27219, ...,  31378, 324428,  71813]])

In [369]:
fitness = Fitness(population).fitness_func(alpha=1)

(1000, 28)


In [253]:
popRanked = rankFunc(fitness)
popRanked[0:5]

[(37, 0.3624956664972313),
 (16, 0.35288324163269047),
 (12, 0.34910993540927115),
 (1, 0.3487805694472641),
 (10, 0.3474013042520906)]

In [181]:
selectionResults = selection(popRanked=popRanked,eliteSize=10)

In [205]:
matingpool = matingPool(population,selectionResults)

In [210]:
children = breedPopulation(matingpool, eliteSize=10)

In [215]:
nextGeneration = mutatePopulation(children, mutationRate=0.05,step=0.05)

yes 13 [ 38951  39461  25107 274747  49276  63955  45833  85102  12148 277855
 652530 140550  66858  40886  93876 927512  48708  68582  62872  31622
  30061  20711  34637 498959  59688 165050 188329  99527]
yes 21 [ 38951  39461  25107 274747  49276  63955  45833  85102  12148 277855
 652530 140550  66858  40886  93876 927512  48708  68582  62872  31622
  30061  20711  34637 498959  59688 165050 188329  99527]
yes 23 [ 38951  39461  25107 274747  49276  63955  45833  85102  12148 277855
 652530 140550  66858  40886  93876 927512  48708  68582  62872  31622
  30061  20711  34637 498959  59688 165050 188329  99527]
yes 2 [  44784   55836   26609  303694   45419   58114   10048   69444   29728
  372768  754624  196030   79614   30709   89028  942287   58181   69503
   66771   42760   31526   32629   18835 1309119   42783   47049  546961
  108921]
yes 20 [  44784   55836   26609  303694   45419   58114   10048   69444   29728
  372768  754624  196030   79614   30709   89028  942287   58181

In [288]:
def nextGeneration(population, eliteSize, mutationRate, step):
    
    fitness = Fitness(population).fitness_func(alpha=1)
    popRanked = rankFunc(fitness)
    selectionResults = selection(popRanked, eliteSize)
    matingpool = matingPool(population, selectionResults)
    children = breedPopulation(matingpool, eliteSize)
    nextGeneration = mutatePopulation(children, mutationRate,step)
    
    return np.array(nextGeneration)

In [429]:
def geneticAlgorithm(population, eliteSize, mutationRate, step, generations):
    print("Initial Fitness: {}".format(Fitness(population).fitness_func(alpha=1).max()))
    
    for i in range(generations):
        population = nextGeneration(population, eliteSize, mutationRate, step)
        population = bound_control(unit_bounds,population)
        print("{} | Current Fitness: {}".format(i,Fitness(population).fitness_func(alpha=1).max()))
    
    return population

In [425]:
def bound_control(unit_bounds,arr):
    '''
    Control lower and upper bounds each unit.
    Unit value(budger) must be greater lower and less upper bounds
    '''
    mask_lower = np.less(arr,unit_bounds[:,0]) #mask for lower bounds
    arr1 = np.where(mask_lower,unit_bounds[:,0],arr) #if unit value less than lower bound -> replace it lower bound value
    
    mask_upper = np.less(unit_bounds[:,1],arr1) # mask for upper bounds
    arr2 = np.where(mask_upper,unit_bounds[:,1],arr1)#if unit value greater than upper bound -> replace it upper bound value
    
    return arr2

In [430]:
population = create_population(size=1000,unit_bounds=unit_bounds)

In [431]:
r = geneticAlgorithm(population,eliteSize=200,mutationRate=0.05,step=0.05,generations=100)

(1000, 28)
Initial Fitness: 3019.28662109375
(1000, 28)
(1000, 28)
0 | Current Fitness: 3165.507568359375
(1000, 28)
(1000, 28)
1 | Current Fitness: 3103.15673828125
(1000, 28)
(1000, 28)
2 | Current Fitness: 3286.33251953125
(1000, 28)
(1000, 28)
3 | Current Fitness: 3378.876220703125
(1000, 28)
(1000, 28)
4 | Current Fitness: 3378.876220703125
(1000, 28)
(1000, 28)
5 | Current Fitness: 3473.162109375
(1000, 28)
(1000, 28)
6 | Current Fitness: 3473.162109375
(1000, 28)
(1000, 28)
7 | Current Fitness: 3551.576904296875
(1000, 28)
(1000, 28)
8 | Current Fitness: 3584.776123046875
(1000, 28)
(1000, 28)
9 | Current Fitness: 3608.94677734375
(1000, 28)
(1000, 28)
10 | Current Fitness: 3640.612060546875
(1000, 28)
(1000, 28)
11 | Current Fitness: 3703.928955078125
(1000, 28)
(1000, 28)
12 | Current Fitness: 3734.20458984375
(1000, 28)
(1000, 28)
13 | Current Fitness: 3823.600830078125
(1000, 28)
(1000, 28)
14 | Current Fitness: 3832.792724609375
(1000, 28)
(1000, 28)
15 | Current Fitness: 3

In [441]:
r[0].sum()

6163248.519046053

In [457]:
output_arr_conv.max()

2967.0

In [458]:
4014/2967

1.352881698685541

In [461]:
input_arr.sum(axis=1).mean()

5288747.564410714

In [465]:
r[0].sum()

6163248.519046053

In [473]:
r[:20].sum(axis=1)

array([6163248.51904605, 6171917.28038403, 6175113.26887636,
       6166111.40819315, 6145593.69296953, 6066613.12486971,
       6138809.25512021, 6184168.1475558 , 6179483.12243023,
       6137527.18801252, 6162954.66857826, 6075319.47020144,
       6186195.55111457, 6114429.44077537, 6195005.47037802,
       6165582.52344955, 6153903.07936919, 6173733.87878734,
       6169132.77505215, 6177061.45190023])