In [1]:
import numpy as np
import pandas as pd

## FUNCTIONS

In [2]:
def transform_parameters (parameters, transformation = "quantile"):
    
    """
    This function is used to normalize the parameters.
    @ parameters: pd.DataFrame of parameters to normalize
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    columns = parameters.columns
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    if transformation == "quantile":
        
        t = QuantileTransformer()
        trans_params =  t.fit_transform(parameters)
        
    if transformation == "standard":
        
        t = StandardScaler()
        trans_params =  t.fit_transform(parameters)
    
    return pd.DataFrame(trans_params, columns=columns), t

In [3]:
def antitransform_parameters (parameters, transformer):
    
    """
    This function is used to normalize the parameters.
    @ parameters: pd.DataFrame of normalized parameters
    @ transformer: Transformer used for normalization
    """  
    
    columns = parameters.columns

    return pd.DataFrame(transformer.inverse_transform(parameters), columns=columns)

In [4]:
def normalize_bands (frequencies, num_bands = 5, num_k_points  = 31, transformation = "quantile"):
    
    """
    This function is used to normalize the bands individually.
    @ frequencies: pd.DataFrame of simulated frequencies per k-points
    @ num_bands: Number of bands in simulations
    @ num_k_points: Number of k-points in simulations
    @ transformation: Type of normalization applied i.e. quantile, standard...
    """
    
    from sklearn.preprocessing import QuantileTransformer, StandardScaler
    import pandas as pd
    
    
    for i in range(int(num_bands)):
        
        column_start = "Band_"+str(i)+"_k_0"
        column_end = "Band_"+str(i)+"_k_"+str(num_k_points-1)
    
        df_band = frequencies.loc[:, column_start : column_end]
        columns = df_band.columns
        
        if transformation == "quantile":
        
            t = QuantileTransformer()
            trans_band =  t.fit_transform(df_band)
            trans_band = pd.DataFrame(trans_band, columns=columns)
        
        if transformation == "standard":
        
            t = StandardScaler()
            trans_band =  t.fit_transform(df_band) 
            trans_band = pd.DataFrame(trans_band, columns=columns)
            
        if i == 0:
            
            transformed_bands = trans_band.copy()
            
        else:
            
            transformed_bands = pd.concat([transformed_bands, trans_band], axis=1)
    
    return transformed_bands

## EXAMPLE OF USE:

### Transformation of parameters

In [5]:
# Uniform choice of 50 parameters in defined ranges

n = np.random.uniform(low=2, high=18, size=(50,))
e = np.random.uniform(low=0.1, high=1.0, size=(50,))
r = np.random.uniform(low=10, high=130, size=(50,)) #in nanometers
t = np.random.uniform(low=0, high=90, size=(50,)) # in º

In [6]:
params_df = pd.DataFrame(np.vstack([n,e,r,t]).T, columns = ["n","e","r","t"])

In [7]:
params_df.head(10)

Unnamed: 0,n,e,r,t
0,2.796507,0.216158,10.571647,68.439945
1,2.717409,0.340354,110.846082,44.397909
2,14.005996,0.386771,90.670004,54.819699
3,14.44549,0.985716,85.256689,1.030931
4,13.54089,0.157336,53.613025,40.669918
5,14.839035,0.352766,105.517199,44.572544
6,14.270825,0.313338,54.184658,59.373187
7,10.411583,0.917487,121.196544,77.040623
8,10.64239,0.843751,101.117047,46.169345
9,15.806348,0.24924,19.531969,27.384907


In [8]:
trans_params_df, transformer = transform_parameters (params_df, transformation = "standard")

You can also check that it works with the QuantileTransformer()

In [9]:
trans_params_df.head(10)

Unnamed: 0,n,e,r,t
0,-1.928,-1.242138,-1.681635,1.095934
1,-1.946154,-0.798367,1.066677,0.103037
2,0.644723,-0.632514,0.513693,0.53344
3,0.745593,1.507595,0.365326,-1.687948
4,0.537975,-1.452317,-0.501961,-0.050922
5,0.835916,-0.75402,0.920624,0.11025
6,0.705505,-0.894901,-0.486293,0.721492
7,-0.180241,1.263803,1.350362,1.451128
8,-0.127268,1.000336,0.800025,0.176195
9,1.057927,-1.123929,-1.436051,-0.599572


In [10]:
transformer

StandardScaler()

### Antitransformation of  parameters

In [11]:
antitrans_params = antitransform_parameters (trans_params_df, transformer)

In [12]:
antitrans_params.head(10)

Unnamed: 0,n,e,r,t
0,2.796507,0.216158,10.571647,68.439945
1,2.717409,0.340354,110.846082,44.397909
2,14.005996,0.386771,90.670004,54.819699
3,14.44549,0.985716,85.256689,1.030931
4,13.54089,0.157336,53.613025,40.669918
5,14.839035,0.352766,105.517199,44.572544
6,14.270825,0.313338,54.184658,59.373187
7,10.411583,0.917487,121.196544,77.040623
8,10.64239,0.843751,101.117047,46.169345
9,15.806348,0.24924,19.531969,27.384907


As we can see **we get the same result** as we had in the beginning!

###  Band normalization

In [13]:
frequencies = pd.read_csv('file_reader/frequencies_data.csv')  

We test a sample with 3239 simulations!

In [14]:
frequencies.head(10)

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,0.0,0.2591,0.51819,0.77729,1.03638,1.29547,1.55455,1.81363,2.0727,2.33174,...,7.84786,7.52634,7.20901,6.89646,6.58935,6.2885,6.02166,5.73665,5.46049,5.21749
1,0.0,0.29122,0.58217,0.87254,1.16198,1.45004,1.736,2.01853,2.29418,2.54952,...,8.00736,7.73822,7.46038,7.17809,6.89442,6.7826,6.75986,6.52295,6.29366,6.17325
2,0.0,0.28548,0.57076,0.85561,1.13972,1.42261,1.7034,1.98027,2.24829,2.48946,...,7.9941,7.73389,7.46315,7.18395,6.89933,6.61304,6.72267,6.48437,6.26683,6.1681
3,0.0,0.29789,0.59544,0.89228,1.18795,1.48188,1.77313,2.05991,2.33765,2.58941,...,8.08047,7.81203,7.53079,7.24485,6.95887,6.97181,6.93343,6.6877,6.46084,6.34544
4,0.0,0.26866,0.53727,0.80578,1.07412,1.3422,1.60987,1.87683,2.14231,2.4027,...,7.88221,7.56867,7.25482,6.94395,6.63799,6.34148,6.338,6.05831,5.79489,5.628
5,0.0,0.263,0.52598,0.78894,1.05184,1.31467,1.57739,1.83992,2.1021,2.36311,...,7.85503,7.53406,7.2168,6.90416,6.59693,6.29608,6.16102,5.88639,5.61729,5.43269
6,0.0,0.26534,0.53066,0.79591,1.06107,1.32609,1.59093,1.85546,2.11944,2.3817,...,7.87126,7.55493,7.2397,6.92807,6.62157,6.32299,6.21483,5.95728,5.69801,5.53778
7,0.0,0.28707,0.57399,0.8606,1.14674,1.43223,1.71685,2.0003,2.28204,2.56019,...,7.97656,7.65788,7.34059,7.02677,6.71799,6.44952,6.80109,6.72492,6.48862,6.36634
8,0.0,0.26917,0.53828,0.80726,1.07604,1.34454,1.61261,1.88002,2.14615,2.40826,...,7.88375,7.5744,7.26306,6.95368,6.64853,6.35172,6.29046,6.07142,5.8318,5.69942
9,0.0,0.26883,0.5376,0.80624,1.07469,1.34284,1.61055,1.87754,2.14311,2.40406,...,7.88236,7.57198,7.25988,6.9498,6.64367,6.34293,6.2664,6.06141,5.827,5.69938


We infere the number of k_points and bands from the columns (check last  column, indexing starts from 0):

In [15]:
num_bands = 5
num_k_points = 31

In [16]:
norm_bands = normalize_bands (frequencies, num_bands = 5, num_k_points  = 31,
                              transformation = "standard")

In [17]:
norm_bands

Unnamed: 0,Band_0_k_0,Band_0_k_1,Band_0_k_2,Band_0_k_3,Band_0_k_4,Band_0_k_5,Band_0_k_6,Band_0_k_7,Band_0_k_8,Band_0_k_9,...,Band_4_k_21,Band_4_k_22,Band_4_k_23,Band_4_k_24,Band_4_k_25,Band_4_k_26,Band_4_k_27,Band_4_k_28,Band_4_k_29,Band_4_k_30
0,-0.029223,-0.599540,-0.636959,-0.640050,-0.643879,-0.641930,-0.638256,-0.632531,-0.622708,-0.599046,...,-0.376412,-0.394309,-0.407657,-0.420260,-0.442102,-0.492510,-0.662978,-0.700581,-0.728762,-0.814497
1,-0.029223,0.000832,0.004421,0.003964,0.000593,-0.002746,-0.007191,-0.013614,-0.024734,-0.053995,...,-0.221074,-0.192758,-0.170624,-0.155344,-0.162798,-0.070552,-0.040013,-0.022383,0.006463,0.032228
2,-0.029223,-0.106458,-0.109961,-0.110505,-0.113626,-0.116176,-0.120571,-0.129181,-0.148632,-0.204311,...,-0.233988,-0.196877,-0.168012,-0.149832,-0.158303,-0.215355,-0.071398,-0.055659,-0.017213,0.027665
3,-0.029223,0.125504,0.137449,0.137432,0.133849,0.128920,0.121943,0.111378,0.092631,0.045840,...,-0.149871,-0.122546,-0.104229,-0.092546,-0.103792,0.091032,0.106462,0.119717,0.153990,0.184774
4,-0.029223,-0.420849,-0.445688,-0.447420,-0.450229,-0.448690,-0.445859,-0.441630,-0.434768,-0.421451,...,-0.342958,-0.354042,-0.364460,-0.375588,-0.397570,-0.447266,-0.396020,-0.423144,-0.433673,-0.450819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3234,-0.029223,-0.008888,-0.006005,-0.006381,-0.009772,-0.013415,-0.018947,-0.028354,-0.047089,-0.097443,...,-0.226849,-0.206466,-0.191341,-0.181372,-0.186209,-0.107906,0.017431,0.018250,0.032107,0.037508
3235,-0.029223,-0.501784,-0.532301,-0.534641,-0.537818,-0.535943,-0.532423,-0.526962,-0.517493,-0.495582,...,-0.360683,-0.375769,-0.388204,-0.400562,-0.422958,-0.474636,-0.542773,-0.539152,-0.545478,-0.554799
3236,-0.029223,0.078028,0.086724,0.086384,0.082794,0.078387,0.072452,0.063774,0.048946,0.011752,...,-0.186052,-0.161700,-0.143042,-0.130201,-0.137731,0.065873,0.074377,0.080947,0.114898,0.147415
3237,-0.029223,-0.508513,-0.539419,-0.541808,-0.545002,-0.543097,-0.539588,-0.534121,-0.524728,-0.502940,...,-0.361579,-0.376948,-0.389562,-0.402039,-0.424506,-0.476404,-0.556191,-0.550495,-0.556835,-0.566750


You can also check that it works with the QuantileTransformer()