In [1]:
import numpy as np
import pandas as pd

## FUNCTIONS

In [2]:
def transform_parameters (parameters, transformation = "quantile"):
    
    """
    This function is used to normalize the parameters.
    @ parameters: pd.DataFrame of parameters to normalize
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    columns = parameters.columns
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    if transformation == "quantile":
        
        t = QuantileTransformer()
        trans_params =  t.fit_transform(parameters)
        
    if transformation == "standard":
        
        t = StandardScaler()
        trans_params =  t.fit_transform(parameters)
    
    return pd.DataFrame(trans_params, columns=columns), t

In [3]:
def antitransform_parameters (parameters, transformer):
    
    """
    This function is used to normalize the parameters.
    @ parameters: pd.DataFrame of normalized parameters
    @ transformer: Transformer used for normalization
    """  
    
    columns = parameters.columns

    return pd.DataFrame(transformer.inverse_transform(parameters), columns=columns)

In [4]:
def normalize_bands (frequencies, num_bands = 5, num_k_points  = 31, transformation = "quantile"):
    
    """
    This function is used to normalize the bands individually.
    @ frequencies: pd.DataFrame of simulated frequencies per k-points
    @ num_bands: Number of bands in simulations
    @ num_k_points: Number of k-points in simulations
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    transformers = []
    
    
    for i in range(int(num_bands)):
        
        column_start = "Band_"+str(i)+"_k_0"
        column_end = "Band_"+str(i)+"_k_"+str(num_k_points-1)
    
        df_band = frequencies.loc[:, column_start : column_end]
        columns = df_band.columns
        
        if transformation == "quantile":
        
            t = QuantileTransformer()
            trans_band =  t.fit_transform(df_band)
            trans_band = pd.DataFrame(trans_band, columns=columns)
            transformers.append(t)
        
        if transformation == "standard":
        
            t = StandardScaler()
            trans_band =  t.fit_transform(df_band) 
            trans_band = pd.DataFrame(trans_band, columns=columns)
            transformers.append(t)
            
        if i == 0:
            
            transformed_bands = trans_band.copy()
            
        else:
            
            transformed_bands = pd.concat([transformed_bands, trans_band], axis=1)
    
    return transformed_bands, transformers

In [5]:
def unormalize_bands (frequencies, transformers, num_bands = 5, num_k_points  = 31):
    
    """
    This function is used to normalize the bands individually.
    @ frequencies: pd.DataFrame of simulated frequencies per k-points
    @ transformers: List of transformers returned by normalizer
    @ num_bands: Number of bands in simulations
    @ num_k_points: Number of k-points in simulations
    """
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    
    for i in range(int(num_bands)):
        
        column_start = "Band_"+str(i)+"_k_0"
        column_end = "Band_"+str(i)+"_k_"+str(num_k_points-1)
    
        df_band = frequencies.loc[:, column_start : column_end]
        columns = df_band.columns
        
        transformer = transformers [i]
        antitrans_band = pd.DataFrame(transformer.inverse_transform(df_band), columns=columns)
            
        if i == 0:
            
            antitransformed_bands = antitrans_band.copy()
            
        else:
            
            antitransformed_bands = pd.concat([antitransformed_bands, antitrans_band], axis=1)
    
    return antitransformed_bands

## EXAMPLE OF USE:

### Transformation of parameters

In [6]:
# Uniform choice of 50 parameters in defined ranges

n = np.random.uniform(low=2, high=18, size=(50,))
e = np.random.uniform(low=0.1, high=1.0, size=(50,))
r = np.random.uniform(low=10, high=130, size=(50,)) #in nanometers
t = np.random.uniform(low=0, high=90, size=(50,)) # in º

In [7]:
params_df = pd.DataFrame(np.vstack([n,e,r,t]).T, columns = ["n","e","r","t"])

In [8]:
params_df.head(10)

Unnamed: 0,n,e,r,t
0,5.416725,0.20015,24.521795,59.503246
1,16.97677,0.962415,23.884085,25.463958
2,9.549508,0.948855,40.990779,84.537446
3,17.865483,0.126069,74.896106,64.215138
4,10.096541,0.133503,62.311893,45.952591
5,2.552653,0.579424,60.675644,62.366345
6,2.469886,0.647776,23.95303,57.194871
7,11.41575,0.202012,48.789398,5.411192
8,6.082812,0.68689,101.990711,79.964913
9,8.285065,0.261366,56.67281,67.20192


In [9]:
trans_params_df, transformer = transform_parameters (params_df, transformation = "quantile")



You can also check that it works with the QuantileTransformer()

In [10]:
trans_params_df.head(10)

Unnamed: 0,n,e,r,t
0,0.163265,0.102041,0.061224,0.591837
1,0.959184,0.979592,0.020408,0.22449
2,0.530612,0.938776,0.163265,0.938776
3,1.0,0.040816,0.653061,0.693878
4,0.571429,0.061224,0.489796,0.367347
5,0.020408,0.469388,0.428571,0.632653
6,0.0,0.571429,0.040816,0.571429
7,0.632653,0.122449,0.326531,0.020408
8,0.22449,0.673469,0.77551,0.877551
9,0.387755,0.204082,0.387755,0.714286


In [11]:
transformer

QuantileTransformer()

### Antitransformation of  parameters

In [12]:
antitrans_params = antitransform_parameters (trans_params_df, transformer)

In [13]:
antitrans_params.head(10)

Unnamed: 0,n,e,r,t
0,5.416725,0.20015,24.521795,59.503246
1,16.97677,0.962415,23.884085,25.463958
2,9.549508,0.948855,40.990779,84.537446
3,17.865483,0.126069,74.896106,64.215138
4,10.096541,0.133503,62.311893,45.952591
5,2.552653,0.579424,60.675644,62.366345
6,2.469886,0.647776,23.95303,57.194871
7,11.41575,0.202012,48.789398,5.411192
8,6.082812,0.68689,101.990711,79.964913
9,8.285065,0.261366,56.67281,67.20192


As we can see **we get the same result** as we had in the beginning!

###  Band normalization

In [14]:
frequencies = pd.read_csv('file_reader/frequencies_data.csv')  

We test a sample with 3239 simulations!

In [15]:
frequencies.head(10)

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,0.0,0.2591,0.51819,0.77729,1.03638,1.29547,1.55455,1.81363,2.0727,2.33174,...,7.84786,7.52634,7.20901,6.89646,6.58935,6.2885,6.02166,5.73665,5.46049,5.21749
1,0.0,0.29122,0.58217,0.87254,1.16198,1.45004,1.736,2.01853,2.29418,2.54952,...,8.00736,7.73822,7.46038,7.17809,6.89442,6.7826,6.75986,6.52295,6.29366,6.17325
2,0.0,0.28548,0.57076,0.85561,1.13972,1.42261,1.7034,1.98027,2.24829,2.48946,...,7.9941,7.73389,7.46315,7.18395,6.89933,6.61304,6.72267,6.48437,6.26683,6.1681
3,0.0,0.29789,0.59544,0.89228,1.18795,1.48188,1.77313,2.05991,2.33765,2.58941,...,8.08047,7.81203,7.53079,7.24485,6.95887,6.97181,6.93343,6.6877,6.46084,6.34544
4,0.0,0.26866,0.53727,0.80578,1.07412,1.3422,1.60987,1.87683,2.14231,2.4027,...,7.88221,7.56867,7.25482,6.94395,6.63799,6.34148,6.338,6.05831,5.79489,5.628
5,0.0,0.263,0.52598,0.78894,1.05184,1.31467,1.57739,1.83992,2.1021,2.36311,...,7.85503,7.53406,7.2168,6.90416,6.59693,6.29608,6.16102,5.88639,5.61729,5.43269
6,0.0,0.26534,0.53066,0.79591,1.06107,1.32609,1.59093,1.85546,2.11944,2.3817,...,7.87126,7.55493,7.2397,6.92807,6.62157,6.32299,6.21483,5.95728,5.69801,5.53778
7,0.0,0.28707,0.57399,0.8606,1.14674,1.43223,1.71685,2.0003,2.28204,2.56019,...,7.97656,7.65788,7.34059,7.02677,6.71799,6.44952,6.80109,6.72492,6.48862,6.36634
8,0.0,0.26917,0.53828,0.80726,1.07604,1.34454,1.61261,1.88002,2.14615,2.40826,...,7.88375,7.5744,7.26306,6.95368,6.64853,6.35172,6.29046,6.07142,5.8318,5.69942
9,0.0,0.26883,0.5376,0.80624,1.07469,1.34284,1.61055,1.87754,2.14311,2.40406,...,7.88236,7.57198,7.25988,6.9498,6.64367,6.34293,6.2664,6.06141,5.827,5.69938


We infere the number of k_points and bands from the columns (check last  column, indexing starts from 0):

In [16]:
num_bands = 5
num_k_points = 31

In [17]:
norm_bands, transformers = normalize_bands (frequencies, num_bands = 5, num_k_points  = 31,
                              transformation = "quantile")

In [18]:
norm_bands

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,0.0,0.031031,0.030030,0.030266,0.030210,0.030030,0.029924,0.029924,0.029925,0.029957,...,0.038256,0.038256,0.038038,0.038372,0.038038,0.038332,0.028817,0.029035,0.029232,0.019615
1,0.0,0.729288,0.729180,0.728543,0.728470,0.727915,0.727609,0.727987,0.726650,0.723948,...,0.714597,0.721758,0.726626,0.731534,0.734599,0.751847,0.707500,0.701822,0.707101,0.700799
2,0.0,0.671736,0.671721,0.671344,0.671184,0.669956,0.668669,0.663023,0.654783,0.621166,...,0.701571,0.717300,0.728749,0.736480,0.737528,0.686936,0.687362,0.680392,0.692635,0.698280
3,0.0,0.775805,0.775799,0.775321,0.775010,0.774779,0.773746,0.773194,0.771481,0.765831,...,0.775843,0.780271,0.779856,0.777754,0.773784,0.794188,0.772612,0.764182,0.768379,0.770194
4,0.0,0.403403,0.403403,0.403540,0.403357,0.403030,0.403056,0.402357,0.399848,0.393101,...,0.405264,0.403093,0.401450,0.402408,0.404232,0.412440,0.432892,0.404111,0.394348,0.378269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3234,0.0,0.723685,0.723685,0.723587,0.723617,0.723584,0.722802,0.721239,0.713713,0.698560,...,0.709709,0.708820,0.706855,0.708764,0.714993,0.737242,0.734783,0.724815,0.719095,0.704720
3235,0.0,0.272654,0.272354,0.272302,0.272244,0.271868,0.271793,0.271603,0.271205,0.270560,...,0.285984,0.289630,0.291348,0.292050,0.292780,0.293633,0.248275,0.272120,0.279165,0.287254
3236,0.0,0.757887,0.757879,0.757505,0.757315,0.756945,0.755723,0.754829,0.754710,0.753258,...,0.747676,0.749518,0.751285,0.751720,0.752189,0.788827,0.760702,0.747737,0.749884,0.749971
3237,0.0,0.260346,0.260260,0.260260,0.260135,0.259440,0.259338,0.259149,0.258327,0.256708,...,0.279221,0.284305,0.284720,0.284974,0.284612,0.282281,0.224946,0.257053,0.266013,0.273390


You can also check that it works with the QuantileTransformer()

In [19]:
transformers

[QuantileTransformer(),
 QuantileTransformer(),
 QuantileTransformer(),
 QuantileTransformer(),
 QuantileTransformer()]

In [20]:
unorm_bands = unormalize_bands (norm_bands, transformers, num_bands = 5, num_k_points  = 31)

In [21]:
unorm_bands

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,0.0,0.25910,0.51819,0.77729,1.03638,1.29547,1.55455,1.81363,2.07270,2.33174,...,7.84786,7.52634,7.20901,6.89646,6.58935,6.28850,6.02166,5.73665,5.46049,5.21749
1,0.0,0.29122,0.58217,0.87254,1.16198,1.45004,1.73600,2.01853,2.29418,2.54952,...,8.00736,7.73822,7.46038,7.17809,6.89442,6.78260,6.75986,6.52295,6.29366,6.17325
2,0.0,0.28548,0.57076,0.85561,1.13972,1.42261,1.70340,1.98027,2.24829,2.48946,...,7.99410,7.73389,7.46315,7.18395,6.89933,6.61304,6.72267,6.48437,6.26683,6.16810
3,0.0,0.29789,0.59544,0.89228,1.18795,1.48188,1.77313,2.05991,2.33765,2.58941,...,8.08047,7.81203,7.53079,7.24485,6.95887,6.97181,6.93343,6.68770,6.46084,6.34544
4,0.0,0.26866,0.53727,0.80578,1.07412,1.34220,1.60987,1.87683,2.14231,2.40270,...,7.88221,7.56867,7.25482,6.94395,6.63799,6.34148,6.33800,6.05831,5.79489,5.62800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3234,0.0,0.29070,0.58113,0.87101,1.15996,1.44746,1.73262,2.01365,2.28590,2.53216,...,8.00143,7.72381,7.43841,7.15042,6.86885,6.73886,6.82793,6.57006,6.32272,6.17921
3235,0.0,0.26433,0.52863,0.79288,1.05705,1.32110,1.58498,1.84858,2.11167,2.37308,...,7.86401,7.54583,7.22964,6.91740,6.61026,6.30943,6.16410,5.92381,5.66819,5.51063
3236,0.0,0.29535,0.59038,0.88473,1.17800,1.46966,1.75890,2.04415,2.32147,2.57579,...,8.04332,7.77087,7.48963,7.20482,6.92180,6.94235,6.89541,6.64275,6.41654,6.30327
3237,0.0,0.26397,0.52792,0.79182,1.05565,1.31937,1.58292,1.84621,2.10899,2.37014,...,7.86309,7.54459,7.22820,6.91583,6.60857,6.30736,6.14820,5.91066,5.65532,5.49714
