In [1]:
import random

# StringIO behaves like a file object 
from io import StringIO 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.datasets import load_boston
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import statsmodels.formula.api as smf

import copy

%matplotlib inline 

In [2]:
class CqkProblem:
    def __init__(self, r, n, d, a, b, up):
        self.n = n
        self.r = r
        self.d = list(d)
        self.a = list(a)
        self.b = list(b)
        self.up = list(up)

In [3]:
def generate_cqk_problem(n):
    d = []
    low = []
    up = []
    b = []
    a = []
    temp = 0
    lb = 0.0
    ub = 0.0
    lower = 10
    upper = 25
    r = 0

    for i in range(n):
        
        b.append(10 + 14*random.random())
        up.append(1 + 14*random.random())
        
        ub = ub + b[i]*up[i];
        
        #Uncorrelated
        d.append(random.randint(10,25))
        a.append(random.randint(10,25))
        
    r = ub*0.7;
    
    return CqkProblem( r, n, d, a, b, up)

In [4]:

def initial_lambda(p, lamb):
    s0=0.0
    q0=0.0
    slopes = []
    for i in range(p.n):
        slopes.append((p.b[i]/p.d[i])*p.b[i])
        s0 = s0 + (p.a[i] * p.b[i]) / p.d[i]
        q0 = q0 + (p.b[i] * p.b[i]) / p.d[i]
    lamb = (p.r-s0)/q0
    return lamb, slopes

In [5]:
def phi_lambda(p,lamb,phi,deriv,slopes,r):
    deriv = 0.0
    phi = r * -1
    x = []
    
    for i in range(p.n):
        
        x.append( (p.b[i] * lamb + p.a[i])/p.d[i])

        if x[i] > p.up[i]:
            x[i] = p.up[i]
        else:
            deriv = deriv + slopes[i];
        phi = phi + p.b[i] * x[i];
    return deriv, phi, x

In [6]:
MAX_IT = 20
INFINITO_NEGATIVO = -999999999;
INFINITO_POSITIVO = 999999999;

def newton(p):
    lambs = [] 
    phis = []
    derivs = []
    phi = 0
    lamb = 0
    alfa = INFINITO_NEGATIVO;
    beta = INFINITO_POSITIVO;
    phi_alfa = 0.0;
    phi_beta = 0.0;
    deriv = 0
    x = []
    r = p.r
    
    lamb, slopes = initial_lambda(p,lamb)
    deriv, phi, x = phi_lambda(p,lamb,phi,deriv,slopes,r)
    lambs.append(lamb)
    derivs.append(deriv)
    phis.append(phi)
    it = 1
#     print(it, deriv, phi,lamb)
    negativo = False
    while phi != 0.0 and it <= MAX_IT:
        if phi > 0:
#             print("positivo")
            beta = lamb
            lambda_n = 0.0
            if deriv > 0.0:
                
                lambda_n = lamb - (phi/deriv)
                if abs(lambda_n - lamb) <= 1e-10:
                    phi = 0.0
                    break
                if lambda_n > alfa:
                    lamb = lambda_n
                else:
#                     print("aqui")
                    phi_beta = phi;
#                     lamb = secant(p,x,alfa,beta,phi_alfa,phi_beta,r);
#             if deriv == 0.0:
#                 lamb = breakpoint_to_the_left(p,lamb);
#                 if lamb <= INFINITO_NEGATIVO or lamb >= INFINITO_POSITIVO:
#                     break
                
        else:
            if it == 1:
                negativo = True
#             print("negativo")
            alfa = lamb;
            lambda_n = 0.0;

            if deriv > 0.0:
                lambda_n = lamb - (phi/deriv)
                if abs(lambda_n - lamb) <= 1e-10:
                    phi = 0.0
                    break
                
                if lambda_n < beta:
                    lamb = lambda_n
                else:
#                     print("aqui")
                    phi_alfa = phi;
#                     lamb = secant(p,x,alfa,beta,phi_alfa,phi_beta,r);
#             if deriv == 0.0:
#                 lamb = breakpoint_to_the_right(p,lamb)
#                 if lamb <= INFINITO_NEGATIVO or lamb >= INFINITO_POSITIVO:
#                     break
        
        
        deriv, phi, x = phi_lambda(p,lamb,phi,deriv,slopes,r)
        it = it + 1
        lambs.append(lamb)
        derivs.append(deriv)
        phis.append(phi)
        
    if phi == 0.0:
        return it,lambs, derivs, phis,slopes
    elif alfa == beta:
        return -1,lambs, derivs, phis,slopes
    else:
        return -2,lambs, derivs, phis,slopes

In [7]:
def func_ty(p):
    t = np.arange(0, 15, 0.5).tolist()
    y = copy.deepcopy(t)
    for j in range(len(t)):
        lamb = t[j]

        soma = 0

        for i in range(n):
            soma = soma + p.b[i] * median(p.low[i], (p.b[i] *lamb + p.a[i]) / p.d[i], p.up[i])

        y[j] = soma
    return y

In [234]:
lista = []
for i in range(200000):
    n = 100
    p = generate_cqk_problem(n)
    it,lambs, derivs, phis,slopes = newton(p)
    soma_a = 0
    soma_b = 0
    soma_d = 0
    soma_up = 0
    for i in range(n):
        soma_a += p.a[i]
        soma_b += p.b[i]
        soma_d += p.d[i]
        soma_up += p.up[i]
    
    soma_a = soma_a/n
    soma_b = soma_b/n
    soma_d = soma_d/n
    soma_up = soma_up/n
    r = p.r/n
    sum_slopes = sum(slopes)
    if it > 2:
        l_rs = [soma_a, soma_b, soma_d, r,lambs[0],phis[0],derivs[0],sum_slopes,lambs[1],lambs[2],lambs[-1]]
#         l_rs = [soma_a, soma_b, soma_d, r,lambs[0],lambs[1],lambs[2],lambs[3],lambs[-1]]


        lista.append(l_rs)

In [235]:
np.savetxt('instance_test100x200k.txt', lista, delimiter = ' ',newline='\n', fmt="%f")

In [236]:
c = ''
with open("instance_test100x200k.txt", "r") as fd:
    c = StringIO(fd.read())

In [237]:
d = c.read()
c = StringIO(d) 
d = np.loadtxt(c) 
feature_names = ['a','b','d','r','inicital_lamb','init_phi','init_deriv','sum_slopes','second_lamb','third_lamb','final_lamb']
# feature_names = ['a','b','d','r','initial_lamb','second_lamb','third_lamb','fourth_lamb','final_lamb']



In [238]:
knapsack = {"data":d, "feature_names": feature_names}
dataset = pd.DataFrame(knapsack['data'], columns = knapsack['feature_names'])

In [266]:
# Coletando x e y

X = dataset.iloc[:,:-2]
y = dataset['final_lamb'].values

In [267]:
X

Unnamed: 0,a,b,d,r,inicital_lamb,init_phi,init_deriv,sum_slopes,second_lamb
0,16.97,17.106112,18.18,94.887145,4.235065,-1516.626306,1148.513259,1833.833559,5.555578
1,17.94,16.836234,17.27,94.326765,3.976506,-1637.009663,1026.333141,1890.372001,5.571514
2,17.14,17.585862,17.66,96.690297,3.893241,-1294.259297,1308.485153,2004.746095,4.882369
3,17.62,17.511734,17.00,96.292672,3.648066,-1675.249407,1223.667436,2097.201356,5.017106
4,17.67,17.548065,17.66,96.214475,4.070819,-1510.773558,1173.687980,1906.602911,5.358021
...,...,...,...,...,...,...,...,...,...
199994,17.87,16.798685,16.95,94.326899,4.045902,-1577.174984,1212.723717,1858.512789,5.346425
199995,16.55,16.401940,17.90,92.391100,4.297653,-1609.406628,1042.060112,1762.469288,5.842100
199996,17.64,17.608340,17.45,105.806320,4.175872,-1598.691429,1455.191086,2069.569912,5.274485
199997,17.29,16.842868,17.05,100.715315,4.288109,-1342.198397,1264.862238,1913.322033,5.349251


In [268]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [269]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [270]:
# Padronização
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [271]:
# Aplicando a padronização aos dados
X_train_p = scaler.transform(X_train)
X_test_p = scaler.transform(X_test)

In [272]:

# from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

In [273]:
# mlp = MLPClassifier(hidden_layer_sizes = (30,30,30))
regr = MLPRegressor(activation='relu',random_state=1, max_iter=5000).fit(X_train_p, y_train)

In [274]:
regr.fit(X_train_p, y_train)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=5000,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [275]:
regr.predict(X_test_p[100:110])

array([5.40922366, 5.7296234 , 5.14727777, 6.53028747, 6.17641247,
       5.73764894, 5.28737671, 5.26817879, 5.02197864, 6.11575748])

In [276]:
regr.score(X_test_p, y_test)

0.9766615292756832

In [277]:
y_test[100:110]

array([5.351328, 5.686761, 5.109523, 6.480343, 6.114226, 5.84352 ,
       5.38092 , 5.261462, 4.991607, 6.110195])

In [216]:
dataset

Unnamed: 0,a,b,d,r,inicital_lamb,init_phi,init_deriv,sum_slopes,second_lamb,third_lamb,final_lamb
0,17.15,17.368732,17.69,93.969087,3.869648,-1604.259164,1235.426480,1953.772315,5.168195,5.366747,5.366747
1,17.43,17.231996,17.71,91.415475,3.944590,-1636.039116,1112.788824,1859.138467,5.414806,5.673396,5.673396
2,17.44,17.035628,17.22,102.886639,4.419324,-1592.683396,1297.581457,1912.178339,5.646749,5.790585,5.797680
3,18.10,16.995521,16.96,103.180973,4.355485,-1804.836528,1245.863394,1917.587454,5.804148,5.963636,5.963636
4,17.49,17.023526,17.53,91.953271,3.871105,-1324.043190,1168.077366,1894.930728,5.004628,5.220795,5.224675
...,...,...,...,...,...,...,...,...,...,...,...
49994,17.49,17.195761,16.98,90.381989,3.726074,-1308.127215,1197.943260,1922.664152,4.818052,4.920331,4.920331
49995,17.78,17.889165,18.01,102.850626,4.219429,-1459.280893,1239.219911,1989.434684,5.397009,5.543312,5.545597
49996,17.58,16.974566,17.00,102.314220,4.388107,-1186.180659,1244.227978,1909.849483,5.341454,5.429156,5.429156
49997,17.42,17.141963,16.93,104.808513,4.376643,-1704.235215,1295.842101,1961.205999,5.691799,5.857934,5.857934


In [18]:
%%time
n = 500
p = generate_cqk_problem(n)
it,lambs, derivs, phis,slopes = newton(p)
it,lambs

CPU times: user 7.38 ms, sys: 827 µs, total: 8.21 ms
Wall time: 7.97 ms


(4,
 [3.9765032451730913,
  5.319654084507383,
  5.4661475369398245,
  5.470344570032929])