In [1]:
import numpy as np
import pandas as pd

## FUNCTIONS

In [2]:
def transform_parameters (parameters, transformation = "quantile"):
    
    """
    This function is used to normalize the parameters
    to the range (0,1)
    @ parameters: pd.DataFrame of parameters to normalize
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    columns = parameters.columns
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    if transformation == "quantile":
        
        t = QuantileTransformer()
        trans_params =  t.fit_transform(parameters)
        
    if transformation == "standard":
        
        t = StandardScaler()
        trans_params =  t.fit_transform(parameters)
    
    return pd.DataFrame(trans_params, columns=columns), t

In [3]:
def antitransform_parameters (parameters, transformer):
    
    """
    This function is used to normalize the parameters
    to the range (0,1)
    @ parameters: pd.DataFrame of normalized parameters
    @ transformer: Transformer used for normalization
    """  
    
    columns = parameters.columns

    return pd.DataFrame(transformer.inverse_transform(parameters), columns=columns)

In [4]:
def normalize_bands (frequencies, num_bands = 5, num_k_points  = 31, transformation = "quantile"):
    
    """
    This function is used to normalize the bands individually
    across all simulations to the range (0,1)
    @ frequencies: pd.DataFrame of simulated frequencies per k-points
    @ num_bands: Number of bands in simulations
    @ num_k_points: Number of k-points in simulations
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    
    for i in range(int(num_bands)-1):
        
        column_start = "Band_"+str(i)+"_k_0"
        column_end = "Band_"+str(i)+"_k_30"
    
        df_band = frequencies.loc[:, column_start : column_end]
        columns = df_band.columns
        
        if transformation == "quantile":
        
            t = QuantileTransformer()
            trans_band =  t.fit_transform(df_band)
            trans_band = pd.DataFrame(df_band, columns=columns)
        
        if transformation == "standard":
        
            t = StandardScaler()
            trans_band =  t.fit_transform(df_band) 
            trans_band = pd.DataFrame(df_band, columns=columns)
            
        if i == 0:
            
            transformed_bands = trans_band.copy()
            
        else:
            
            transformed_bands = pd.concat([transformed_bands, trans_band], axis=1)
    
    return transformed_bands

## EXAMPLE OF USE:

### Transformation of parameters

In [5]:
# Uniform choice of 50 parameters in defined ranges

n = np.random.uniform(low=2, high=18, size=(50,))
e = np.random.uniform(low=0.1, high=1.0, size=(50,))
r = np.random.uniform(low=10, high=130, size=(50,)) #in nanometers
t = np.random.uniform(low=0, high=90, size=(50,)) # in º

In [6]:
params_df = pd.DataFrame(np.vstack([n,e,r,t]).T, columns = ["n","e","r","t"])

In [7]:
params_df.head(10)

Unnamed: 0,n,e,r,t
0,2.487808,0.509386,33.753952,63.246107
1,16.664406,0.270516,105.765231,76.997319
2,15.533593,0.894016,101.541753,3.232401
3,2.931236,0.149625,95.462325,87.536877
4,4.796324,0.649586,97.707549,57.905569
5,15.702092,0.569063,33.617318,11.407366
6,16.383733,0.882953,106.29738,66.861534
7,17.631752,0.514427,117.070513,46.859784
8,10.517913,0.347073,17.124783,21.066158
9,12.915177,0.862252,23.717603,18.904563


In [8]:
trans_params_df, transformer = transform_parameters (params_df, transformation = "standard")

You can also check that it works with the QuantileTransformer()

In [9]:
trans_params_df.head(10)

Unnamed: 0,n,e,r,t
0,-1.417785,0.119727,-1.193949,0.66049
1,1.391033,-0.823939,0.902028,1.189984
2,1.166984,1.639217,0.779099,-1.650352
3,-1.329928,-1.30152,0.602149,1.595812
4,-0.960397,0.673591,0.667499,0.454852
5,1.200369,0.355483,-1.197926,-1.335573
6,1.335423,1.595513,0.917517,0.799703
7,1.582693,0.139641,1.231082,0.029531
8,0.173224,-0.521498,-1.677961,-0.963659
9,0.648196,1.513733,-1.486069,-1.046892


In [10]:
transformer

StandardScaler()

### Antitransformation of  parameters

In [11]:
antitrans_params = antitransform_parameters (trans_params_df, transformer)

In [12]:
antitrans_params.head(10)

Unnamed: 0,n,e,r,t
0,2.487808,0.509386,33.753952,63.246107
1,16.664406,0.270516,105.765231,76.997319
2,15.533593,0.894016,101.541753,3.232401
3,2.931236,0.149625,95.462325,87.536877
4,4.796324,0.649586,97.707549,57.905569
5,15.702092,0.569063,33.617318,11.407366
6,16.383733,0.882953,106.29738,66.861534
7,17.631752,0.514427,117.070513,46.859784
8,10.517913,0.347073,17.124783,21.066158
9,12.915177,0.862252,23.717603,18.904563


As we can see **we get the same result** as we had in the beginning!

###  Band normalization

In [13]:
frequencies = pd.read_csv('file_reader/frequencies_data.csv')  

We test a sample with 50 simulations!

In [14]:
frequencies_sample  = frequencies.head(50)

In [15]:
frequencies_sample.head(10)

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,0.0,0.2591,0.51819,0.77729,1.03638,1.29547,1.55455,1.81363,2.0727,2.33174,...,7.84786,7.52634,7.20901,6.89646,6.58935,6.2885,6.02166,5.73665,5.46049,5.21749
1,0.0,0.29122,0.58217,0.87254,1.16198,1.45004,1.736,2.01853,2.29418,2.54952,...,8.00736,7.73822,7.46038,7.17809,6.89442,6.7826,6.75986,6.52295,6.29366,6.17325
2,0.0,0.28548,0.57076,0.85561,1.13972,1.42261,1.7034,1.98027,2.24829,2.48946,...,7.9941,7.73389,7.46315,7.18395,6.89933,6.61304,6.72267,6.48437,6.26683,6.1681
3,0.0,0.29789,0.59544,0.89228,1.18795,1.48188,1.77313,2.05991,2.33765,2.58941,...,8.08047,7.81203,7.53079,7.24485,6.95887,6.97181,6.93343,6.6877,6.46084,6.34544
4,0.0,0.26866,0.53727,0.80578,1.07412,1.3422,1.60987,1.87683,2.14231,2.4027,...,7.88221,7.56867,7.25482,6.94395,6.63799,6.34148,6.338,6.05831,5.79489,5.628
5,0.0,0.263,0.52598,0.78894,1.05184,1.31467,1.57739,1.83992,2.1021,2.36311,...,7.85503,7.53406,7.2168,6.90416,6.59693,6.29608,6.16102,5.88639,5.61729,5.43269
6,0.0,0.26534,0.53066,0.79591,1.06107,1.32609,1.59093,1.85546,2.11944,2.3817,...,7.87126,7.55493,7.2397,6.92807,6.62157,6.32299,6.21483,5.95728,5.69801,5.53778
7,0.0,0.28707,0.57399,0.8606,1.14674,1.43223,1.71685,2.0003,2.28204,2.56019,...,7.97656,7.65788,7.34059,7.02677,6.71799,6.44952,6.80109,6.72492,6.48862,6.36634
8,0.0,0.26917,0.53828,0.80726,1.07604,1.34454,1.61261,1.88002,2.14615,2.40826,...,7.88375,7.5744,7.26306,6.95368,6.64853,6.35172,6.29046,6.07142,5.8318,5.69942
9,0.0,0.26883,0.5376,0.80624,1.07469,1.34284,1.61055,1.87754,2.14311,2.40406,...,7.88236,7.57198,7.25988,6.9498,6.64367,6.34293,6.2664,6.06141,5.827,5.69938


We infere the number of k_points and bands from the columns (check last  column, indexing starts from 0):

In [16]:
num_bands = 5
num_k_points = 31

In [17]:
norm_bands = normalize_bands (frequencies, num_bands = 5, num_k_points  = 31,
                              transformation = "standard")

In [18]:
norm_bands

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_3_k_21,Band_3_k_22,Band_3_k_23,Band_3_k_24,Band_3_k_25,Band_3_k_26,Band_3_k_27,Band_3_k_28,Band_3_k_29,Band_3_k_30
0,0.0,0.25910,0.51819,0.77729,1.03638,1.29547,1.55455,1.81363,2.07270,2.33174,...,4.03105,4.39709,4.76331,5.12952,5.49566,5.86143,5.99483,5.70947,5.43372,5.17037
1,0.0,0.29122,0.58217,0.87254,1.16198,1.45004,1.73600,2.01853,2.29418,2.54952,...,5.25336,5.47242,5.77133,6.10849,6.45781,6.60658,6.32886,6.05823,5.81107,5.67339
2,0.0,0.28548,0.57076,0.85561,1.13972,1.42261,1.70340,1.98027,2.24829,2.48946,...,4.70291,5.01139,5.36024,5.72033,6.08011,6.43133,6.32711,6.04807,5.78450,5.61342
3,0.0,0.29789,0.59544,0.89228,1.18795,1.48188,1.77313,2.05991,2.33765,2.58941,...,5.44347,5.65701,5.95088,6.28410,6.62970,6.66242,6.37849,6.09639,5.82049,5.79950
4,0.0,0.26866,0.53727,0.80578,1.07412,1.34220,1.60987,1.87683,2.14231,2.40270,...,4.27948,4.60738,4.96699,5.33372,5.70162,6.06503,6.03705,5.75459,5.48421,5.37585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3234,0.0,0.29070,0.58113,0.87101,1.15996,1.44746,1.73262,2.01365,2.28590,2.53216,...,5.09566,5.32391,5.62869,5.96575,6.30696,6.49577,6.25504,5.98416,5.77067,5.82622
3235,0.0,0.26433,0.52863,0.79288,1.05705,1.32110,1.58498,1.84858,2.11167,2.37308,...,4.15882,4.50658,4.87002,5.23607,5.60110,5.95852,6.01401,5.72870,5.45278,5.21925
3236,0.0,0.29535,0.59038,0.88473,1.17800,1.46966,1.75890,2.04415,2.32147,2.57579,...,5.38883,5.60201,5.89560,6.22901,6.57554,6.62023,6.34019,6.06091,5.79032,5.74312
3237,0.0,0.26397,0.52792,0.79182,1.05565,1.31937,1.58292,1.84621,2.10899,2.37014,...,4.14869,4.49821,4.86191,5.22782,5.59246,5.94897,6.01322,5.72736,5.45120,5.20115


You can also check that it works with the QuantileTransformer()