In [2]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Load data from Excel file
file_path = 'C:\\Users\\Admin\\OneDrive - MSFT\\Desktop\\jupyter\\Discs Dataset.xlsx'
sheet_name_thickness = 'Thickness 1'  
sheet_name_gap = 'Gap 2'
sheet_name_height = 'Height'
sheet_name_thickness_2 = 'Thickness 2'
sheet_name_gap_1 = 'Gap 1'

sheet_name_dia_3 = 'Diameter 3'  
sheet_name_dia_2 = 'Diameter 2' 


data_thickness = pd.read_excel(file_path, sheet_name=sheet_name_thickness)
data_thickness_2 = pd.read_excel(file_path, sheet_name=sheet_name_thickness_2)

data_gap_1 = pd.read_excel(file_path, sheet_name=sheet_name_gap_1)
data_gap = pd.read_excel(file_path, sheet_name=sheet_name_gap)

data_height = pd.read_excel(file_path, sheet_name=sheet_name_height)

data_dia_3 = pd.read_excel(file_path, sheet_name=sheet_name_dia_3)
data_dia_2 = pd.read_excel(file_path, sheet_name=sheet_name_dia_2)


all_x = []
all_y = []


In [3]:

wavelength = data_thickness['Wavelength (nm)'].values

# Define thickness values to interpolate
thickness_values = [5, 10, 15, 20, 30, 50, 80, 100]

constant_non_uniform_diameter = "60nm80nm100nm120nm"

# Function to extract individual diameters and pad with NaN
def extract_diameters(diameter_str, max_length=6):
    diameters = []
    for x in diameter_str.split('nm'):
        if x:
            diameters.append(int(x))
    padded_diameters = diameters + [np.nan] * (max_length - len(diameters))
    mask = [1] * len(diameters) + [0] * (max_length - len(diameters))
    return padded_diameters, mask

# Process each thickness value
for thickness in thickness_values:
    extension_ratio = data_thickness[f'{thickness} nm.2'].values  # Assuming the column name follows this pattern

    # Interpolation
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')

    # Generate interpolated values for a range of wavelengths
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input arrays with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), thickness)
    gap_array = np.full((batch_size, 1), 5)  # Assume a constant gap for thickness data
    wavelength_array = wavelength_range.reshape(batch_size, 1)

    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(constant_non_uniform_diameter)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Combine all data
x = np.vstack(all_x)
y = np.vstack(all_y)

In [4]:
wavelength = data_thickness_2['Wavelength (nm)'].values

# Define thickness values to interpolate
thickness_values = [2,4,6,8,10,15,20,25,30,40,50,70,80,100]

# Constant non-uniform diameter string
constant_non_uniform_diameter = "40nm60nm80nm100nm120nm"

# Function to extract individual diameters and pad with NaN
def extract_diameters(diameter_str, max_length=6):
    diameters = []
    for x in diameter_str.split('nm'):
        if x:
            diameters.append(int(x))
    padded_diameters = diameters + [np.nan] * (max_length - len(diameters))
    mask = [1] * len(diameters) + [0] * (max_length - len(diameters))
    return padded_diameters, mask

# Process each thickness value
for thickness in thickness_values:
    extension_ratio = data_thickness_2[f'{thickness}nm.2'].values  # Assuming the column name follows this pattern

    # Interpolation
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')

    # Generate interpolated values for a range of wavelengths
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input arrays with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), thickness)
    gap_array = np.full((batch_size, 1), 5)  # Assume a constant gap for thickness data
    wavelength_array = wavelength_range.reshape(batch_size, 1)

    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(constant_non_uniform_diameter)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Combine all data
x = np.vstack(all_x)
y = np.vstack(all_y)

In [5]:
wavelength = data_gap_1['Wavelength (nm)'].values

gap_values = [4,5,6,8,10,15]

constant_non_uniform_diameter = "40nm60nm80nm100nm120nm"

# Process each gap value
for gap in gap_values:
    extension_ratio = data_gap_1[f'{gap} nm.2'].values  # Assuming the column name follows this pattern

    # Interpolation
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')

    # Generate interpolated values for a range of wavelengths
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input array with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), 50)
    gap_array = np.full((batch_size, 1), gap)
    wavelength_array = wavelength_range.reshape(batch_size, 1)

    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(constant_non_uniform_diameter)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Combine all data
x = np.vstack(all_x)
y = np.vstack(all_y)

# Shuffle the data


In [6]:
wavelength = data_gap['Wavelength (nm)'].values

gap_values = [5, 10, 15, 20]

constant_non_uniform_diameter = "60nm80nm100nm120nm"

# Process each gap value
for gap in gap_values:
    extension_ratio = data_gap[f'{gap} nm.2'].values  # Assuming the column name follows this pattern

    # Interpolation
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')

    # Generate interpolated values for a range of wavelengths
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input array with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), 50)
    gap_array = np.full((batch_size, 1), gap)
    wavelength_array = wavelength_range.reshape(batch_size, 1)

    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(constant_non_uniform_diameter)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Combine all data
x = np.vstack(all_x)
y = np.vstack(all_y)

# Shuffle the data


In [7]:
height_values = [30, 50, 80, 100, 120, 150]

constant_non_uniform_diameter = "40nm60nm80nm100nm120nm"

for height in height_values:

    extension_ratio = data_height[f'{height} nm.2'].values

    interpolated_extension_ratios = interpolation_function(wavelength_range)
    
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), height)
    thickness_array = np.full((batch_size, 1), 50)
    gap_array = np.full((batch_size, 1), 5)  # Assume a constant gap for thickness data
    wavelength_array = wavelength_range.reshape(batch_size, 1)

        # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(constant_non_uniform_diameter)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

        # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

        # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Combine all data
x = np.vstack(all_x)
y = np.vstack(all_y)


In [8]:

non_uniform_diameter_values = [
    "80nm100nm",
    "60nm80nm100nm",
    "60nm80nm100nm120nm",
    "40nm60nm80nm100nm120nm",
    "40nm60nm80nm100nm120nm140nm"
]


# Process each non-uniform diameter value
for non_uniform_diameter in non_uniform_diameter_values:
    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(non_uniform_diameter)

    # Interpolation for height data
    extension_ratio = data_dia_3[f'{non_uniform_diameter}.2'].values
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input arrays with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), 50)
    gap_array = np.full((batch_size, 1), 5)
    wavelength_array = wavelength_range.reshape(batch_size, 1)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Convert lists to numpy arrays
x = np.vstack(all_x)
y = np.vstack(all_y)


In [9]:

non_uniform_diameter_values = [
    "60nm80nm",
    "80nm100nm",
    "60nm80nm100nm",
    "60nm80nm100nm120nm",
    "40nm60nm80nm100nm120nm",
    "40nm60nm80nm100nm120nm140nm"
]


# Process each non-uniform diameter value
for non_uniform_diameter in non_uniform_diameter_values:
    # Extract individual diameters and mask
    diameter_array, mask_array = extract_diameters(non_uniform_diameter)

    # Interpolation for height data
    extension_ratio = data_dia_2[f'{non_uniform_diameter}.2'].values
    interpolation_function = interp1d(wavelength, extension_ratio, kind='linear', fill_value='extrapolate')
    wavelength_range = np.linspace(wavelength.min(), wavelength.max(), 2000)
    interpolated_extension_ratios = interpolation_function(wavelength_range)

    # Create input arrays with constants and varying inputs
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), 50)
    thickness_array = np.full((batch_size, 1), 10)
    gap_array = np.full((batch_size, 1), 5)
    wavelength_array = wavelength_range.reshape(batch_size, 1)
    diameter_array = np.tile(np.array(diameter_array), (batch_size, 1))
    mask_array = np.tile(np.array(mask_array), (batch_size, 1))

    # Create the input array
    x = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)

    # Append to storage
    all_x.append(x)
    all_y.append(interpolated_extension_ratios.reshape(batch_size, 1))

# Convert lists to numpy arrays
x = np.vstack(all_x)
y = np.vstack(all_y)


In [10]:
'''all_x = np.vstack(all_x)

# Convert to DataFrame

columns=['height','thickness','gap','wavelength','dia1','dia2','dia3','dia4','dia5','dia6']

df = pd.DataFrame(all_x,columns=columns)
# Save to CSV
csv_path = 'C:\\Users\\Admin\\Desktop\\abcgd.csv'
df.to_csv(csv_path, index=False,columns=columns)
print(f"Data saved to {csv_path}")'''


'all_x = np.vstack(all_x)\n\n# Convert to DataFrame\n\ncolumns=[\'height\',\'thickness\',\'gap\',\'wavelength\',\'dia1\',\'dia2\',\'dia3\',\'dia4\',\'dia5\',\'dia6\']\n\ndf = pd.DataFrame(all_x,columns=columns)\n# Save to CSV\ncsv_path = \'C:\\Users\\Admin\\Desktop\\abcgd.csv\'\ndf.to_csv(csv_path, index=False,columns=columns)\nprint(f"Data saved to {csv_path}")'

In [11]:
x[np.isnan(x)] = -1
y[np.isnan(y)] = -1

# Generate shuffled indices
shuffled_indices = np.random.permutation(len(x))
x_shuffled = x[shuffled_indices]
y_shuffled = y[shuffled_indices]

x_train, x_test, y_train, y_test = train_test_split(x_shuffled, y_shuffled, test_size=0.1, random_state=42)

'''
df_x_train = pd.DataFrame(x_train, columns=[f'feature_{i+1}' for i in range(x_train.shape[1])])
df_y_train = pd.DataFrame(y_train, columns=['target'])

df_train = pd.concat([df_x_train, df_y_train], axis=1)


df_train.to_csv('C:\\Users\\Admin\\Desktop\\train_data.csv', index=False)
'''


"\ndf_x_train = pd.DataFrame(x_train, columns=[f'feature_{i+1}' for i in range(x_train.shape[1])])\ndf_y_train = pd.DataFrame(y_train, columns=['target'])\n\ndf_train = pd.concat([df_x_train, df_y_train], axis=1)\n\n\ndf_train.to_csv('C:\\Users\\Admin\\Desktop\\train_data.csv', index=False)\n"

In [12]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()
x_train_scaled = scaler_x.fit_transform(x_train)
x_test_scaled = scaler_x.transform(x_test)
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

'''data_combined = np.concatenate((x_train_scaled, y_train_scaled), axis=1)

# Create a pandas DataFrame from the combined data
columns_x = [f'feature_{i}_scaled' for i in range(x_train_scaled.shape[1])]
columns_y = ['target_scaled']
columns_combined = columns_x + columns_y

df_combined = pd.DataFrame(data_combined, columns=columns_combined)

# Save to CSV
csv_file_path = 'C:\\Users\\Admin\\Desktop\\scaled_data_combined.csv'
df_combined.to_csv(csv_file_path, index=False)
'''


"data_combined = np.concatenate((x_train_scaled, y_train_scaled), axis=1)\n\n# Create a pandas DataFrame from the combined data\ncolumns_x = [f'feature_{i}_scaled' for i in range(x_train_scaled.shape[1])]\ncolumns_y = ['target_scaled']\ncolumns_combined = columns_x + columns_y\n\ndf_combined = pd.DataFrame(data_combined, columns=columns_combined)\n\n# Save to CSV\ncsv_file_path = 'C:\\Users\\Admin\\Desktop\\scaled_data_combined.csv'\ndf_combined.to_csv(csv_file_path, index=False)\n"

In [None]:

model = Sequential([
    Dense(32, input_dim=x.shape[1], activation='relu'),  # First hidden layer with 32 neurons
    Dense(64, activation='relu'),
    Dense(128, activation='relu'),
    Dense(256, activation='relu'),
    Dense(32, activation='relu'),  
    Dense(1, activation='linear')  
])

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(x_train_scaled, y_train_scaled, epochs=500, batch_size=32, validation_data=(x_test_scaled, y_test_scaled), verbose=1, callbacks=[early_stopping])

# Evaluate the model using Mean Absolute Error (MAE)
predictions_scaled = model.predict(x_test_scaled)
predictions = scaler_y.inverse_transform(predictions_scaled)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - loss: 0.2030 - val_loss: 0.0699
Epoch 2/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 0.0609 - val_loss: 0.0625
Epoch 3/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 0.0542 - val_loss: 0.0470
Epoch 4/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 0.0463 - val_loss: 0.0429
Epoch 5/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0449 - val_loss: 0.0421
Epoch 6/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0436 - val_loss: 0.0420
Epoch 7/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.0408 - val_loss: 0.0474
Epoch 8/500
[1m2757/2757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 0.0421 - val_loss: 0.0356
Epoch 9/500

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def plot_predictions(thickness, height, gap, dia1, dia2, dia3, dia4, dia5, dia6):
    # Generate a range of wavelengths
    wavelength_range = np.linspace(400, 2500, 2500)
    
    # Create input array with constants and varying wavelength
    batch_size = len(wavelength_range)
    length_array = np.full((batch_size, 1), height)
    thickness_array = np.full((batch_size, 1), thickness)
    gap_array = np.full((batch_size, 1), gap)
    wavelength_array = wavelength_range.reshape(batch_size, 1)
    diameter_array = np.array([[dia1, dia2, dia3, dia4, dia5, dia6]] * batch_size)  # Replicate for each wavelength
    
    # Concatenate all inputs to form the final input batch
    x_input = np.concatenate([length_array, thickness_array, gap_array, wavelength_array, diameter_array], axis=1)
    
    # Scale the inputs
    x_input_scaled = scaler_x.transform(x_input)
    
    # Predict using the trained model
    predictions_scaled = model.predict(x_input_scaled)
    predictions = scaler_y.inverse_transform(predictions_scaled)
    
    return wavelength_range, predictions

# Example usage:
thickness_example = 60
height_example = 20
gap_example = 8
dia1_example = 60
dia2_example = 80
dia3_example = -1
dia4_example = -1
dia5_example = -1
dia6_example = -1

wavelength_range_example, predictions_example = plot_predictions(thickness_example, height_example, gap_example,
                                                                 dia1_example, dia2_example, dia3_example,
                                                                 dia4_example, dia5_example, dia6_example)

# Plotting the graph
plt.figure(figsize=(15, 5))
plt.plot(wavelength_range_example, predictions_example, label='Predictions')
plt.xlabel('Wavelength')
plt.ylabel('Predicted Value')
plt.title('Predictions vs Wavelength')
plt.legend()
plt.grid(True)
plt.show()
