In [1]:
import numpy as np
import pandas as pd
import scipy.optimize as optimize
from fffit.utils import (
    shuffle_and_split,
    values_scaled_to_real,
)
import sys
sys.path.append("../")


from utils.r143a import R143aConstants
R143a = R143aConstants()

In [2]:
top_liq = pd.read_csv("../csv/r143a-density-iter1-liquid-params.csv", delimiter = ",", index_col = 0)
top_vap = pd.read_csv("../csv/r143a-density-iter1-vapor-params.csv", delimiter = ",", index_col = 0)

In [3]:
# iternum = 2
# csv_path = "/scratch365/mcarlozo/HFC_143a_FFO_FF/r143a/analysis/csv/"
# in_csv_name = "r143a-density-iter" + str(iternum) + "-results.csv"
# out_csv_name = "r143a-density-iter" + str(iternum + 1) + "-params.csv"
# out_top_liquid_csv_name = "r143a-density-iter" + str(iternum ) + "-liquid-params.csv"
# out_top_vapor_csv_name = "r143a-density-iter" + str(iternum ) + "-vapor-params.csv"

# top_liq = pd.read_csv(csv_path + out_top_liquid_csv_name, delimiter = ",", index_col = 0)
# top_vap = pd.read_csv(csv_path + out_top_vapor_csv_name, delimiter = ",", index_col = 0)

In [4]:
top_liq = top_liq.reset_index(drop=True)
top_vap = top_vap.reset_index(drop=True)

dist_guess = 1
dist_seed = 10
bounds = [(0,None)]
target_num = 100

In [5]:
# print(top_liq)

In [6]:
def opt_dist(distance, top_samples, constants, target_num, rand_seed = None, eval = False):
    """
    Calculates the distance between points such that exactly a target number of points are chosen for the next iteration
    
    Parameters:
    -----------
        distance: float, The allowable minimum distance between points
        top_samples: pandas data frame, Collection of top liquid/vapor sampes
        constants: utils.r143a.R143aConstants, contains the infromation for a certain refrigerant
        target_num: int, the number of samples to choose next
        rand_seed: int, the seed number to use: None by default
        eval: bool, Determines whether error is calculated or new_points is returned
    
    Returns:
        error: float, The squared error between the target value and number of new_points
        OR
        new_points: pandas data frame, a pandas data frame containing the number of points to be used 
    """
    if rand_seed != None:
        np.random.seed(rand_seed)
    new_points = pd.DataFrame()
    discarded_points = pd.DataFrame(columns=top_samples.columns)
    while len(top_samples > 0):
        # Shuffle the pareto points
        top_samples = top_samples.sample(frac=1)
        new_points = new_points.append(top_samples.iloc[[0]])
        # Remove anything within distance
        l1_norm = np.sum(
            np.abs(
                top_samples[list(constants.param_names)].values
                - new_points[list(constants.param_names)].iloc[[-1]].values
            ),
            axis=1,
        )
        points_to_remove = np.where(l1_norm < distance)[0]
        discarded_points = discarded_points.append(
            top_samples.iloc[points_to_remove]
        )
        top_samples.drop(
            index=top_samples.index[points_to_remove], inplace=True
        )
    error = (target_num - len(new_points))**2
    if eval == True:
        return new_points
    else:
        return error

In [7]:
args_l = (top_liq ,R143a, target_num, dist_seed)
solution_l = optimize.minimize(opt_dist, dist_guess, bounds = bounds, args=args_l, method='Nelder-Mead')
dist_opt_l = solution_l.x
new_points_l = opt_dist(dist_opt_l, top_liq, R143a, target_num, rand_seed=dist_seed , eval = True)

while len(new_points_l) != target_num:
    dist_opt_l = solution_l.x
    dist_seed += 1
    new_points_l = opt_dist(dist_opt_l, top_vap, R143a, target_num, rand_seed=dist_seed , eval = True)
    
print(len(new_points_l), "top liquid density points are left after removing similar points using a distance of"
      , np.round(dist_opt_l,5))
new_points_l

100 top liquid density points are left after removing similar points using a distance of [1.34375]


Unnamed: 0,sigma_C1,sigma_C2,sigma_F1,sigma_H1,epsilon_C1,epsilon_C2,epsilon_F1,epsilon_H1
4890,0.855491,0.343260,0.257899,0.895506,0.086149,0.698108,0.929700,0.386436
3365,0.867045,0.098902,0.652573,0.352863,0.080733,0.969942,0.694246,0.640382
2478,0.902060,0.728238,0.573574,0.056350,0.846480,0.493687,0.123433,0.183996
5334,0.148354,0.106583,0.748603,0.369954,0.690349,0.604635,0.997678,0.913073
1665,0.824118,0.229272,0.661109,0.458999,0.963919,0.574075,0.077897,0.794242
...,...,...,...,...,...,...,...,...
5496,0.096668,0.956107,0.186659,0.977177,0.156368,0.795380,0.871055,0.140093
2312,0.252251,0.335793,0.550188,0.819524,0.714969,0.322748,0.952246,0.999020
3346,0.690942,0.057219,0.410255,0.692059,0.305245,0.970417,0.460494,0.774714
4114,0.856577,0.518262,0.654714,0.715256,0.998708,0.339138,0.264236,0.420093


In [8]:
args_v = (top_vap ,R143a, target_num, dist_seed)
solution_v = optimize.minimize(opt_dist, dist_guess, bounds = bounds, args=args_v, method='Nelder-Mead')
dist_opt_v = solution_v.x
new_points_v = opt_dist(dist_opt_v, top_vap, R143a, target_num, rand_seed=dist_seed , eval = True)

while len(new_points_v) != target_num:
    dist_opt_v = solution_v.x
    dist_seed += 1
    new_points_v = opt_dist(dist_opt_v, top_vap, R143a, target_num, rand_seed=dist_seed , eval = True)
    
print(len(new_points_v), "top viquid density points are veft after removing simivar points using a distance of"
      , np.round(dist_opt_v,5))
new_points_v

100 top viquid density points are veft after removing simivar points using a distance of [0.83125]


Unnamed: 0,sigma_C1,sigma_C2,sigma_F1,sigma_H1,epsilon_C1,epsilon_C2,epsilon_F1,epsilon_H1
16,0.975353,0.290719,0.580944,0.197721,0.312727,0.347546,0.456071,0.992134
122,0.113193,0.515903,0.357673,0.848227,0.703194,0.486292,0.526738,0.945810
146,0.150932,0.058594,0.457216,0.663563,0.850335,0.861994,0.428529,0.899232
77,0.607174,0.380483,0.449415,0.835516,0.553293,0.245617,0.645935,0.977117
254,0.182102,0.223335,0.394433,0.959247,0.954313,0.899046,0.805838,0.243042
...,...,...,...,...,...,...,...,...
161,0.949339,0.132454,0.643023,0.100938,0.110031,0.676661,0.509105,0.867131
155,0.970235,0.476779,0.393574,0.581229,0.684531,0.072088,0.145180,0.978712
187,0.732970,0.817605,0.444647,0.325993,0.840264,0.008624,0.153378,0.922369
53,0.811071,0.814599,0.227812,0.887362,0.921455,0.035080,0.010206,0.868692
