In [2]:
import numpy as np
import pandas as pd
import os
from glob import glob

from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from scipy.optimize import root_scalar

from joblib import dump, load

from expon_mixture import ExponMixture 

In [7]:
fits = pd.read_csv('./fits/expon_mix_2comp_fits.txt', index_col=0)

In [8]:
fits[fits['p1']<1.0].describe()

Unnamed: 0,p1,p2,lambda1,lambda2
count,158.0,158.0,158.0,158.0
mean,0.145675,0.854325,36134210.0,2132482000.0
std,0.299654,0.299654,418870200.0,12397640000.0
min,0.003333,0.003333,18.0,459.9383
25%,0.013333,0.933333,111.75,308195.9
50%,0.023333,0.976667,276.7045,3456719.0
75%,0.066667,0.986667,706.9048,52703120.0
max,0.996667,0.996667,5248641000.0,130546600000.0


In [9]:
fits

Unnamed: 0_level_0,p1,p2,lambda1,lambda2
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
barthel/n210/gen_n210_m903_k3SAT_seed2473397791.cnf,1.000000,,3.214107e+03,
barthel/n210/gen_n210_m903_k3SAT_seed862748622.cnf,1.000000,,1.342518e+04,
barthel/n210/gen_n210_m903_k3SAT_seed4006075830.cnf,1.000000,,1.295157e+03,
barthel/n210/gen_n210_m903_k3SAT_seed1547818438.cnf,1.000000,,1.365623e+03,
barthel/n210/gen_n210_m903_k3SAT_seed3919912883.cnf,1.000000,,1.511253e+03,
...,...,...,...,...
qhid/n70/gen_n70_m385_k3SAT_seed2030441879.cnf,1.000000,,3.993567e+02,
qhid/n70/gen_n70_m385_k3SAT_seed3717411169.cnf,1.000000,,2.097433e+02,
qhid/n70/gen_n70_m385_k3SAT_seed684617509.cnf,1.000000,,2.370867e+02,
qhid/n70/gen_n70_m385_k3SAT_seed1293934752.cnf,1.000000,,1.008646e+11,


In [10]:
df = pd.read_csv('./calculate_features/features_train.csv')
df.set_index('instance', inplace=True)

In [11]:
df.head()

Unnamed: 0_level_0,nvarsOrig,nclausesOrig,nvars,nclauses,reducedVars,reducedClauses,Pre-featuretime,vars-clauses-ratio,POSNEG-RATIO-CLAUSE-mean,POSNEG-RATIO-CLAUSE-coeff-variation,...,gsat_FirstLocalMinStep_Q.10,gsat_FirstLocalMinStep_Q.90,gsat_BestAvgImprovement_Mean,gsat_BestAvgImprovement_CoeffVariance,gsat_FirstLocalMinRatio_Mean,gsat_FirstLocalMinRatio_CoeffVariance,ls-gsat-featuretime,lobjois-mean-depth-over-vars,lobjois-log-num-nodes-over-vars,lobjois-featuretime
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
barthel/n210/gen_n210_m903_k3SAT_seed3555821415.cnf,210.0,903.0,199.0,892.0,0.055276,0.012332,0.0,0.223094,0.488565,0.604264,...,39.0,52.0,0.566566,0.41586,0.857284,0.050368,2.72,0.388437,0.765035,1.14
barthel/n210/gen_n210_m903_k3SAT_seed862748622.cnf,210.0,903.0,203.0,897.0,0.034483,0.006689,0.0,0.22631,0.493683,0.586009,...,40.0,53.0,0.550169,0.417447,0.849422,0.054019,2.72,0.365887,0.719841,1.1
barthel/n210/gen_n210_m903_k3SAT_seed3576518490.cnf,210.0,903.0,204.0,892.0,0.029412,0.012332,0.0,0.2287,0.501121,0.583115,...,40.0,53.0,0.554184,0.416429,0.858623,0.049907,2.62,0.365476,0.700129,1.12
barthel/n210/gen_n210_m903_k3SAT_seed2527888016.cnf,210.0,903.0,201.0,894.0,0.044776,0.010067,0.0,0.224832,0.495153,0.609906,...,40.0,53.0,0.579225,0.412855,0.86,0.051025,2.66,0.386035,0.933892,1.16
barthel/n210/gen_n210_m903_k3SAT_seed3919912883.cnf,210.0,903.0,205.0,897.0,0.02439,0.006689,0.0,0.22854,0.487737,0.589016,...,40.0,53.0,0.5637,0.410805,0.86364,0.048454,2.7,0.383312,0.786043,1.16


In [12]:
merged_data = pd.concat([df, fits], join='inner', axis=1)

In [13]:
v2 = merged_data[merged_data['p1']<1.0]
y = v2[['p1', 'lambda1', 'lambda2']]
X = v2.drop(['p1', 'p2', 'lambda1', 'lambda2'], axis=1)

In [17]:
regr_rf = RandomForestRegressor()
regr_rf.fit(X, y)

RandomForestRegressor()

In [20]:
dump(regr_rf, 'rf_regressor.joblib') 

['rf_regressor.joblib']

In [4]:
regr_rf = load('rf_regressor.joblib') 

In [14]:
regr_rf.predict(X)

array([[8.22933333e-01, 2.12648425e+05, 4.30166305e+07],
       [7.49366667e-01, 7.12882210e+03, 3.73102134e+07],
       [3.01033333e-01, 2.04523228e+04, 3.18499685e+05],
       [7.07533333e-01, 3.73850689e+04, 2.84907455e+07],
       [7.16966667e-01, 6.15304796e+03, 1.23425901e+10],
       [8.37200000e-01, 1.72223364e+05, 2.85690247e+08],
       [8.30166667e-01, 6.78179076e+04, 3.26524268e+07],
       [2.05166667e-01, 1.91380932e+04, 1.01832029e+08],
       [8.73666667e-01, 6.83956425e+03, 3.36240094e+07],
       [8.01533333e-01, 5.24913808e+07, 5.30447506e+08],
       [7.24066667e-01, 5.24888212e+07, 2.49108787e+09],
       [3.45166667e-01, 9.95512458e+03, 2.52947592e+07],
       [8.03133333e-01, 3.02602696e+04, 9.04879105e+07],
       [7.97000000e-01, 3.80417225e+04, 2.19530467e+08],
       [8.87600000e-01, 2.40942383e+04, 2.15131550e+07],
       [2.29666667e-02, 8.40096236e+03, 2.82985228e+08],
       [2.70666667e-02, 5.05180220e+02, 7.82220887e+09],
       [9.63333333e-03, 4.13944