In [1]:
import pandas as pd  
pd.options.plotting.backend = 'plotly'
pd.set_option('display.max_columns', None)

import numpy as np            

from scipy.stats import poisson        
from scipy.optimize import minimize    
from scipy.special import factorial

from numba import jit 

from sklearn.preprocessing import StandardScaler

import plotly
import plotly.express as px

In [2]:
@jit()
def loglike(price, count, alfa, beta):
    '''
    Linear Log-Likelihood from demand exponential function:
    
    demand = np.exp(alfa*price + beta)
    
    args:
    price float : product price
    count int : product quantity sold in that price
    alfa float : first fit parameter, price parameter
    beta float : second fit parameter, intercept parameter
    '''
    
    demand = np.exp(alfa*price + beta)
    
    return -demand + count*np.log(demand) # - np.log(factorial(count))  

In [16]:

def fit(datos, p1, p2,
        opciones={'disp':True,'maxiter':100},  # Funcion para obtener alfas, betas y gamma, stats y maximas iteraciones
        metodo='trust-constr',    # metodo Nelder-Mead, Powell, CG, BFGS, Newton-CG, L-BFGS-B, TNC, COBYLA, SLSQP, trust-constr, dogleg, trust-ncg, trust-exact, trust-krylov (tambien custom)
        ):  

    
    # inicializacion aleatoria de los parametros
    val_inicial=np.concatenate((np.array([p1]),   # alfa
                                np.array([p2])    # beta
                                ))                
  

    def menos_loglike(params):  # Funcion menos log-verosimilitud, funcion a minimizar

        log_like = [loglike(n.price,      # price from data
                            n.count_,     # count from data
                            params[0],    # alfa parameter
                            params[1])    # beta parameter
                    for n in datos.itertuples()] # seleccion en filas
        
        return -sum(log_like) 
        
    
    salida=minimize(menos_loglike, 
                    val_inicial, 
                    options=opciones, 
                    #constraints=restricciones, 
                    method=metodo) # minimizacion

    #return salida       # salida de la minimizacion
    return salida.x   # alfa, beta 
    

In [11]:
%%time

df = pd.read_parquet('../data/final_data.parquet')

df.head()

CPU times: user 607 ms, sys: 1.54 s, total: 2.15 s
Wall time: 1.22 s


Unnamed: 0,client_id,date,extracash_turnover_amount,extracash_turnover_rate,pluscash_turnover_amount,pluscash_turnover_rate,plussurrogate_turnover_amount,plussurrogate_turnover_rate,surrogate_turnover_amount,surrogate_turnover_rate
0,296447,2021-01-01,7912.367612,60.0,2136.782258,49.0,7077.904564,32.0,20388.985626,21.0
1,276754,2021-01-02,9104.936748,100.0,11334.40349,100.0,210.645187,35.0,8293.243775,85.0
2,318898,2021-01-03,23072.351087,98.0,452.564701,66.0,13242.633239,36.0,895.68858,87.0
3,320104,2021-01-04,23693.190939,66.0,18322.330762,53.0,1417.620618,36.0,3930.98687,100.0
4,130854,2021-01-05,77.759699,56.0,9575.927443,100.0,25689.294891,34.0,14951.468994,23.0


In [12]:
df.shape

(14600000, 10)

In [13]:
rates = [c for c in df.columns if '_rate' in c]

for r in rates:
    df[r]=df[r].apply(lambda x: x/100)

In [14]:
df.head()

Unnamed: 0,client_id,date,extracash_turnover_amount,extracash_turnover_rate,pluscash_turnover_amount,pluscash_turnover_rate,plussurrogate_turnover_amount,plussurrogate_turnover_rate,surrogate_turnover_amount,surrogate_turnover_rate
0,296447,2021-01-01,7912.367612,0.6,2136.782258,0.49,7077.904564,0.32,20388.985626,0.21
1,276754,2021-01-02,9104.936748,1.0,11334.40349,1.0,210.645187,0.35,8293.243775,0.85
2,318898,2021-01-03,23072.351087,0.98,452.564701,0.66,13242.633239,0.36,895.68858,0.87
3,320104,2021-01-04,23693.190939,0.66,18322.330762,0.53,1417.620618,0.36,3930.98687,1.0
4,130854,2021-01-05,77.759699,0.56,9575.927443,1.0,25689.294891,0.34,14951.468994,0.23


In [20]:
df_lst = []


for r in rates:
    data = df.groupby(['date', r]).count().reset_index()
    
    data = data.iloc[:, 0:3]
    
    data.columns = ['date', 'price', 'count_']
    
    df_lst.append(data)

In [27]:
params = fit(df_lst[0][['price', 'count_']], 
                 p1=-1., 
                 p2=0.,
                 opciones={'disp':True,'maxiter':10000})
    
y = np.exp(params[0]*x+params[1])*100



df_plot=pd.DataFrame()
df_plot['x']=x
df_plot['y']=y
df_plot['profit']=df_plot.x*df_plot.y


display(px.line(df_plot, x='x', y='y'))
px.line(df_plot, x='x', y='profit')

`xtol` termination condition is satisfied.
Number of iterations: 64, function evaluations: 186, CG iterations: 121, optimality: 1.05e+01, constraint violation: 0.00e+00, execution time:  5.5 s.


In [29]:
x = np.linspace(1, 100, 100)

for d in df_lst:
    
    params = fit(d[['price', 'count_']], 
                 p1=-1., 
                 p2=0.,
                 opciones={'disp':True,'maxiter':10000})
    
    y = np.exp(params[0]*x+params[1])
    
    
    
    df_plot=pd.DataFrame()
    df_plot['x']=x
    df_plot['y']=y
    df_plot['profit']=df_plot.x*df_plot.y*100
    
    
    display(px.line(df_plot, x='x', y='y'))
    display(px.line(df_plot, x='x', y='profit'))

`xtol` termination condition is satisfied.
Number of iterations: 64, function evaluations: 186, CG iterations: 121, optimality: 1.05e+01, constraint violation: 0.00e+00, execution time:  5.5 s.


`xtol` termination condition is satisfied.
Number of iterations: 42, function evaluations: 126, CG iterations: 74, optimality: 6.53e+01, constraint violation: 0.00e+00, execution time:  3.7 s.


`xtol` termination condition is satisfied.
Number of iterations: 40, function evaluations: 120, CG iterations: 61, optimality: 2.21e+02, constraint violation: 0.00e+00, execution time:  1.5 s.


`xtol` termination condition is satisfied.
Number of iterations: 47, function evaluations: 129, CG iterations: 80, optimality: 1.24e+02, constraint violation: 0.00e+00, execution time:  3.8 s.


In [34]:
df_lst[-1]

Unnamed: 0,date,price,count_
0,2021-01-01,0.20,128
1,2021-01-01,0.21,270
2,2021-01-01,0.22,254
3,2021-01-01,0.23,263
4,2021-01-01,0.24,230
...,...,...,...
59125,2022-12-31,0.96,117
59126,2022-12-31,0.97,125
59127,2022-12-31,0.98,111
59128,2022-12-31,0.99,117


In [35]:
params = fit(df_lst[-1][['price', 'count_']], 
                 p1=-1., 
                 p2=0.,
                 opciones={'disp':True,'maxiter':10000})
    
y = np.exp(params[0]*x+params[1])*100



df_plot=pd.DataFrame()
df_plot['x']=x
df_plot['y']=y
df_plot['profit']=df_plot.x*df_plot.y


display(px.line(df_plot, x='x', y='y'))
px.line(df_plot, x='x', y='profit')

`xtol` termination condition is satisfied.
Number of iterations: 47, function evaluations: 129, CG iterations: 80, optimality: 1.24e+02, constraint violation: 0.00e+00, execution time:  4.1 s.


In [36]:
df_lst[-1].to_parquet('../data/fake_data3.parquet')