The goal of this workbook will be to simulate all of the simulated ellipsometric spectra that will be used for training neural networks. First load in any previously defined functions. This includes the functions developed to simulate Tauc-Lorentz, Cody-Lorentz, and Bruggeman effective medium approximations (EMA). 

In [1]:
#Imports libraries and defines functions
%run Functions-6-22-25.ipynb

Now load in the needed optical properties

In [5]:
# input location of files
os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY TO THE FOLDER LABELED "Optical Properties" in the XXXX space ####################
file =  "Junda_SLG.csv"
SLG = pd.read_csv(file)
SLG.name = 'SLG'

SLG['e'] = SLG['e1'].to_numpy() +  1j * SLG['e2'].to_numpy()

#Load in optical properties for void
file =  "Void.csv"
Void = pd.read_csv(file)
Void.name = "Void"

E = SLG['Energy (eV)'] # define Energy range
wv = SLG['Wavelength (nm)'] # Define Wavelength range 

# Now we also need to load in the Si native oxide and the Si wafer. 

file =  "Si_JAW.csv"
Si_JAW = pd.read_csv(file)
Si_JAW.name = "Si_JAW"


file =  "NTVE_JAW.csv"
NTVE_JAW = pd.read_csv(file)
NTVE_JAW.name = "NTVE_JAW"


In [7]:
def Snells_Law(Structure, AOI_degrees):
    """
    Compute the angle of light in each material using Snell's Law.

    Parameters:
    - Structure: list of pandas DataFrames, each with a column 'N' representing refractive index.
    - AOI_degrees: scalar angle of incidence in degrees (applies equally to all data points).

    Returns:
    - angles_deg: 2D NumPy array of angles (in degrees), shape (num_layers, num_points)
    """

    # Step 1: Extract refractive indices into a 2D array: shape (num_layers, num_points)
    N = np.stack([
    df["N"].to_numpy()
    if "N" in df.columns
    else df["n"].to_numpy() + 1j * df["k"].to_numpy()
    if all(col in df.columns for col in ["n", "k"])
    else np.full(df.shape[0], np.nan + 1j * np.nan)  # or raise an error
    for df in Structure
    ])

    num_layers, num_points = N.shape

    # Step 2: Initialize angle array: same shape as N
    angles = np.zeros((num_layers, num_points), dtype = complex)

    # Step 3: Convert AOI to radians and broadcast it across the first layer
    AOI_rad = np.radians(AOI_degrees)
    angles[0] = AOI_rad  # fills first row with scalar AOI value

    # Step 4: Apply Snell's Law across each interface
    for i in range(1, num_layers):
        ratio = N[i-1] / N[i]
        sin_theta = ratio * np.sin(angles[i-1])

        angles[i] = np.arcsin(sin_theta)

    # Step 5: Convert to degrees (if desired)
    # angles_deg = np.degrees(angles)

    return angles

In [9]:
def fresnel_coefficients(N, angles):
    """
    Compute Fresnel reflection and transmission coefficients for s and p polarizations at each interface.

    Parameters:
    - N: (num_layers, num_points) array of refractive indices
    - angles: (num_layers, num_points) array of angles in radians

    Returns:
    - rs, rp: reflection coefficients (s and p polarization)
    - ts, tp: transmission coefficients (s and p polarization)
    """

    # Slice interface values
    n1, n2 = N[:-1], N[1:]
    theta1, theta2 = angles[:-1], angles[1:]

    # Cosines
    cos1 = np.cos(theta1)
    cos2 = np.cos(theta2)

    # Avoid divide-by-zero if needed
    denominator_s = (n1 * cos1 + n2 * cos2)
    denominator_p = (n2 * cos1 + n1 * cos2)

    # s-polarized
    rs = (n1 * cos1 - n2 * cos2) / denominator_s
    ts = (2 * n1 * cos1) / denominator_s

    # p-polarized
    rp = (n2 * cos1 - n1 * cos2) / denominator_p
    tp = (2 * n1 * cos1) / denominator_p

    return rs, rp, ts, tp

In [11]:
def Scattering_Matrix(N, angles, d, wavelengths, r, t):

    L, P = N.shape # L = number of layers, P = spectral points

    d = d[:, np.newaxis]
    λ = wavelengths[np.newaxis, :]
    # Calculate exponential term "E"
    E = (2 * np.pi / λ) * N[1:-1] * d * np.cos(angles[1:-1])
    #First define the propagation matrices. The light will propagate through L - 2 films, discounting the ambient and the substrate.
    prop_matrices = np.zeros((L - 2, P, 2, 2), dtype=complex)
    # Fill the diagonals
    prop_matrices[:, :, 0, 0] = np.exp(-1j * E)    
    prop_matrices[:, :, 1, 1] = np.exp(1j * E)    

    #Now define the interface matrices. There will be L -1 interfaces. 
    int_matrices = np.zeros((L - 1, P, 2, 2), dtype=complex)
    int_matrices[:, :, 0, 0] = 1 / t
    int_matrices[:, :, 0, 1] =  r / t
    int_matrices[:, :, 1, 0] =  r / t
    int_matrices[:, :, 1, 1] =  1 / t

    # Initialize the result array
    S = int_matrices[0]  # Start with Y0
    
    # Perform the matrix multiplication in the specified order
    for i in range(1, L - 1):
        #print('test')
        S = np.matmul(S, prop_matrices[i - 1])  # Multiply with X layers
        S = np.matmul(S, int_matrices[i])  # Multiply with Y layers

    return(S)

In [13]:
def SE_Sim(Structure, AOI, d, write_data=False, NCS=True):


    # Step 1: Extract the complex index of refraction (N) from each column in structure. 
    wv = Structure[0]['Wavelength (nm)'].to_numpy()
    
    N = np.stack([
    df["N"].to_numpy()
    if "N" in df.columns
    else df["n"].to_numpy() + 1j * df["k"].to_numpy()
    if all(col in df.columns for col in ["n", "k"])
    else np.full(df.shape[0], np.nan + 1j * np.nan)  # or raise an error
    for df in Structure
    ])

    # Step 2: Calculate angles from Snells law.
    angles =  Snells_Law(Structure, AOI)

    #print(angles.shape)

    # Step 3: Calculate the fresnel coefficients at each interface
    rs, rp, ts, tp = fresnel_coefficients(N, angles)
    #print(ts)

    # Step 4: Calculate S matrix 
    Ss = Scattering_Matrix(N, angles, d, wv, rs, ts)
    Sp = Scattering_Matrix(N, angles, d, wv, rp, tp)

   # Step 5: Calculate Rp and Rs
    Rp = (Sp[:, 1, 0] / Sp[:, 0, 0])
    Rs = (Ss[:, 1, 0] / Ss[:, 0, 0]) 

    #Step 5.5 Calculate Rho (for some reason, complex conjugate is needed, like due to a convention mismatch in previous math)

    rho =  np.conj(Rp / Rs)

   # Step 6: Calculate psi and delta

    psi = np.arctan( np.abs( rho ) ).real

    delta =  np.unwrap(np.angle(rho)) 

    # Step 7: Calculate N, C, and S

    N = np.cos(2 * psi).real
    C = (np.sin(2 * psi) * np.cos(delta)).real
    S = (np.sin(2 * psi) * np.sin(delta)).real

    # Step 7.5 convet psi and delta to degress



    if NCS: 
    
        my_dict =  {'Wavelength (nm)': wv, 'N': N, 'C': C, 'S': S}
        df = pd.DataFrame(my_dict)

    if not NCS: 

        # Step 7.5 convet psi and delta to degress

        psi = psi * 180 / np.pi
        delta = delta * 180 / np.pi

        my_dict =  {'Wavelength (nm)': wv, 'Psi': psi, 'Delta': delta}
        df = pd.DataFrame(my_dict)


    if not write_data:
        
        return(df)

    if write_data: 
        name = str()
        for z in range(len(d)):
            name = name + str(Structure[z+1].name) + "_"
            name = name + str(d[z]) + "nm"
            if z < len(d) - 1:
                name = name + "_"

        name_row = pd.DataFrame([[name] + [''] * (df.shape[1] - 1)], columns=df.columns)
        df = pd.concat([name_row, df], ignore_index=True)
        df.name = name

        timestamp = datetime.now()
        timestamp_string = timestamp.strftime('%Y-%m-%d-%H-%M-%S') + f"-{timestamp.microsecond // 1000:03d}"
        title = "trial_" + timestamp_string  + ".csv"


        df.to_csv( title + ".csv" , index=False)
        return(df)

Data_Set_1: This data set will be generated with Cody-Loretnz oscillator on native oxide coated Si wafer. The structure will consist of Si Wafer / Native Oxide / Bulk Film. This structure mimics the structure determined from Least-Square regression on Samples MV1519 and MV1523. 

The train set will be 100,000 files. The validation set will be 10,000. The test set will be 10,000

In [16]:
#GENERATE TRAIN SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(100000): #How many files we want to generate 

    #start by randomly generating the parameters from a pre-defined range. 
    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)

    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array( [ Bulk_Thickness, NTVE_JAW_Thickness ] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [17]:
#GENERATE Validation Set

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(10000): #How many files we want to generate 

    #start by randomly generating the parameters from a pre-defined range. 
    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)

    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array( [ Bulk_Thickness, NTVE_JAW_Thickness ] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [18]:
#GENERATE TEST SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(10000): #How many files we want to generate 

    #start by randomly generating the parameters from a pre-defined range. 
    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)

    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array( [ Bulk_Thickness, NTVE_JAW_Thickness ] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

Data_Set_2: This data set will be generated with Tauc-Loretnz oscillator on Soda-Lime Glass. The structure will consist of Soda Lime Glass (SLG) / SLG Bulk Interface / Bulk Film / Surface Layer. The surface layer will consist of 50% void and 50% of the randomy generated bulk film. The SLG bulk interface will be 50% SLG and 50% bulk film. This structure mimics the structure determined from Least-Square regression on Samples MV1530. 

In [18]:
#Generate Training Set
# Now lets change directory to the train set directory
os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(100000): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([ EMA_Thickness, Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [19]:
#Generate Validation Set
# Now lets change directory to the val set directory
os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(10000): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([ EMA_Thickness, Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [20]:
#Generate test Set
# Now lets change directory to the test set directory
os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(10000): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([ EMA_Thickness, Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

Data_Set_3-1: This data set will be generated with Cody-Loretnz oscillator oon soda-lime glass (SLG). The structure will consist of SLG / SLG Bulk Film interface / Bulk Film

Data set 3 will contain 12,500 files from Data_Set_1, 12,500 files from Data_Set_2, 12,500 files from Data_Set_3-1, and 12,500 files from Data_Set_3-2 Each of the 4 data sets that make up Data Set 3 will use a different structure.

In [26]:
#GENERATE TRAIN SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(12500): #How many oscillators we want to generate

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    # EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([  Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [30]:
#GENERATE Validation SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE VALIDATION DATA in the XXXX space ####################

for i in range(1250): #How many oscillators we want to generate

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    # EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([  Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [31]:
#GENERATE Validation SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE VALIDATION DATA in the XXXX space ####################

for i in range(1250): #How many oscillators we want to generate

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    # EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data

    EMA_Substrate =  Bruggeman_EMA( CL, SLG , 0.5 )
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    EMA_Substrate_thickness = 3
    ang_off = 0 # range from [0:0]

    Structure = [Void, CL,  EMA_Substrate, SLG]
    Mat_Thick = np.array([  Bulk_Thickness, EMA_Substrate_thickness ])
    Theta_Incident = 64.93
    #SE_Sim_Substrate(Structure, Theta_Incident, Mat_Thick, [ang_off], write_data=True )

    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

Data_Set_3-2: This data set will be generated with Cody-Loretnz oscillator on native oxide coated Si wafer. The structure will consist of Si Wafer / Native Oxide / Bulk Film / Surface Layer. The surface layer will consist of 50% void and 50% of the randomy generated bulk film. 

In [34]:
#GENERATE TRAIN SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TRAINING DATA in the XXXX space ####################

for i in range(12500): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array([EMA_Thickness, Bulk_Thickness, NTVE_JAW_Thickness] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [35]:
#GENERATE Validation SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE VALIDATION DATA in the XXXX space ####################

for i in range(1250): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array([EMA_Thickness, Bulk_Thickness, NTVE_JAW_Thickness] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

In [36]:
#GENERATE Test SET

os.chdir(r"XXXX") ########## PLEASE PUT THE DIRECTORY WHERE YOU WOULD LIKE TO STORE THE TEST DATA in the XXXX space ####################

for i in range(1250): #How many oscillators we want to generate

    #start by randomly generating the parameters from a range. 

    E_inf = (1) # fixed at 1
    Amp  = (random.randrange(500,1500) / 10 ) # range from [50:150.0]
    Br = (random.randrange(150,350) / 100) # range from [1.50 :3.50]
   
    Eo_temp = random.randrange(300,500) 
    Eo = Eo_temp / 100 # range from [3.00:5.00]
    
    Eg_temp = random.randrange(100,250) 
    Eg = Eg_temp / 100 # range from [1.00:2.5]

    Ep = (random.randrange(50, (Eo_temp - Eg_temp))  / 100 ) # range from [0.50:2.00]
    Et = 0 #this term being 0 effectivly neglects the Urbach energy and simplifies the equations.
    Egt = Eg + Et



    # Now we generate our Cody-Lorentz material
    CL = Get_CL_Material(E, Ep, Eg, Eo, Br, Amp, Egt, E_inf, wv)
    EMA = Bruggeman_EMA( CL, Void , 0.5 )
    #Now for each model, lets make some simulated SE data 
    
    #Generate Thicknesses
    Bulk_Thickness = (random.randrange(2500,12500) / 100 ) # range from [25.00:125.00]
    EMA_Thickness = (random.randrange(10,200) / 100 ) # range from [0.10:2.00]
    NTVE_JAW_Thickness = (random.randrange(160,170) / 100 ) # range from [1.60:1.70] # fixed
    ang_off = 0 # range from [0:0]

    Structure = [Void, EMA, CL, NTVE_JAW, Si_JAW]
    Mat_Thick = np.array([EMA_Thickness, Bulk_Thickness, NTVE_JAW_Thickness] )
    Theta_Incident = 64.93
    SE_Sim(Structure, Theta_Incident,  Mat_Thick, write_data=True, NCS=True)

Data set 3 will contain 12,500 files from Data_Set_1, 12,500 files from Data_Set_2, 12,500 files from Data_Set_3-1, and 12,500 files from Data_Set_3-2 Each of the 4 data sets that make up Data Set 3 will use a different structure.

These files will be combined manually outside of this notebook. 