In [283]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.model_selection import train_test_split

In [284]:
file_path_new = '../Raman/graphene/GF_as_of_50 min_Ch4_600SCCM.csv'
df_new = pd.read_csv(file_path_new)

In [285]:
df_new

Unnamed: 0,Stage,0,1,2,3,4,5
0,Pressure Setpoint (Torr),2,2,2,2,2,680
1,Temperature Setpoint (C),25,1000,1000,1000,300,25
2,Temperature Ramp Rate (C/min) 30 C/min max inc...,0,,,,,
3,Ar Flow (sccm),0,100,100,0,100,100
4,CH4 Flow (sccm),0,0,0,600,0,0
5,H2 Flow (sccm),0,100,100,100,0,0
6,C2H4 Flow (sccm),0,0,0,0,0,0
7,Dwell Time (min),0,,30,50,0,0
8,Stage End Condition,Start,Temp,Time,Time,Temp,End


In [286]:
# Transpose the dataframe to get the stages as rows
df_new_transposed = df_new.set_index('Stage').transpose()

In [287]:
df_new_transposed

Stage,Pressure Setpoint (Torr),Temperature Setpoint (C),Temperature Ramp Rate (C/min) 30 C/min max increase,Ar Flow (sccm),CH4 Flow (sccm),H2 Flow (sccm),C2H4 Flow (sccm),Dwell Time (min),Stage End Condition
0,2,25,0.0,0,0,0,0,0.0,Start
1,2,1000,,100,0,100,0,,Temp
2,2,1000,,100,0,100,0,30.0,Time
3,2,1000,,0,600,100,0,50.0,Time
4,2,300,,100,0,0,0,0.0,Temp
5,680,25,,100,0,0,0,0.0,End


In [288]:
df_new_transposed= df_new_transposed.drop("Temperature Ramp Rate (C/min) 30 C/min max increase", axis=1)

In [289]:
df_new_transposed=df_new_transposed.drop("Stage End Condition", axis=1)

In [290]:
# Replace all NaN values in 'YourColumnName' with 15
df_new_transposed['Dwell Time (min)'] = df_new_transposed['Dwell Time (min)'].fillna(15)

In [291]:
display(df_new_transposed)

Stage,Pressure Setpoint (Torr),Temperature Setpoint (C),Ar Flow (sccm),CH4 Flow (sccm),H2 Flow (sccm),C2H4 Flow (sccm),Dwell Time (min)
0,2,25,0,0,0,0,0
1,2,1000,100,0,100,0,15
2,2,1000,100,0,100,0,30
3,2,1000,0,600,100,0,50
4,2,300,100,0,0,0,0
5,680,25,100,0,0,0,0


In [292]:
# Creating a single dimensional array with stages
single_dimensional_data = []
columns = []

In [293]:
for stage in df_new_transposed.index:
    single_dimensional_data.extend(df_new_transposed.loc[stage].values)
    columns.extend([f'Stage {stage} {col}' for col in df_new_transposed.columns])

In [294]:
# Creating the new dataframe
one_d_df = pd.DataFrame([single_dimensional_data], columns=columns, index=['Sample 1'])

In [295]:
# Display the new dataframe
display(one_d_df)

Unnamed: 0,Stage 0 Pressure Setpoint (Torr),Stage 0 Temperature Setpoint (C),Stage 0 Ar Flow (sccm),Stage 0 CH4 Flow (sccm),Stage 0 H2 Flow (sccm),Stage 0 C2H4 Flow (sccm),Stage 0 Dwell Time (min),Stage 1 Pressure Setpoint (Torr),Stage 1 Temperature Setpoint (C),Stage 1 Ar Flow (sccm),...,Stage 4 H2 Flow (sccm),Stage 4 C2H4 Flow (sccm),Stage 4 Dwell Time (min),Stage 5 Pressure Setpoint (Torr),Stage 5 Temperature Setpoint (C),Stage 5 Ar Flow (sccm),Stage 5 CH4 Flow (sccm),Stage 5 H2 Flow (sccm),Stage 5 C2H4 Flow (sccm),Stage 5 Dwell Time (min)
Sample 1,2,25,0,0,0,0,0,2,1000,100,...,0,0,0,680,25,100,0,0,0,0


In [296]:
one_d_df['Peak Ratio'] = 1.53

In [297]:
display(one_d_df)

Unnamed: 0,Stage 0 Pressure Setpoint (Torr),Stage 0 Temperature Setpoint (C),Stage 0 Ar Flow (sccm),Stage 0 CH4 Flow (sccm),Stage 0 H2 Flow (sccm),Stage 0 C2H4 Flow (sccm),Stage 0 Dwell Time (min),Stage 1 Pressure Setpoint (Torr),Stage 1 Temperature Setpoint (C),Stage 1 Ar Flow (sccm),...,Stage 4 C2H4 Flow (sccm),Stage 4 Dwell Time (min),Stage 5 Pressure Setpoint (Torr),Stage 5 Temperature Setpoint (C),Stage 5 Ar Flow (sccm),Stage 5 CH4 Flow (sccm),Stage 5 H2 Flow (sccm),Stage 5 C2H4 Flow (sccm),Stage 5 Dwell Time (min),Peak Ratio
Sample 1,2,25,0,0,0,0,0,2,1000,100,...,0,0,680,25,100,0,0,0,0,1.53


In [298]:
# Save the new dataframe to a CSV file with the row index label
one_d_df.to_csv("new.csv", index=True)

In [299]:
# Function to generate random sample for all columns
def generate_random_sample(df, peak_ratio_min=1.2, peak_ratio_max=1.7):
    random_sample = df.applymap(lambda x: np.random.uniform(0, 1000))
    random_sample['Peak Ratio'] = np.random.uniform(peak_ratio_min, peak_ratio_max)
    return random_sample

In [300]:
# Generate 999 random samples
random_samples = [generate_random_sample(one_d_df) for _ in range(99)]

  random_sample = df.applymap(lambda x: np.random.uniform(0, 1000))


In [301]:
# Combine original dataframe with the random samples
combined_df = pd.concat([one_d_df] + random_samples, ignore_index=True)

In [302]:
# Create new index with "Sample 1", "Sample 2", etc.
combined_df.index = [f'Sample {i+1}' for i in range(len(combined_df))]

In [303]:
display(combined_df)

Unnamed: 0,Stage 0 Pressure Setpoint (Torr),Stage 0 Temperature Setpoint (C),Stage 0 Ar Flow (sccm),Stage 0 CH4 Flow (sccm),Stage 0 H2 Flow (sccm),Stage 0 C2H4 Flow (sccm),Stage 0 Dwell Time (min),Stage 1 Pressure Setpoint (Torr),Stage 1 Temperature Setpoint (C),Stage 1 Ar Flow (sccm),...,Stage 4 C2H4 Flow (sccm),Stage 4 Dwell Time (min),Stage 5 Pressure Setpoint (Torr),Stage 5 Temperature Setpoint (C),Stage 5 Ar Flow (sccm),Stage 5 CH4 Flow (sccm),Stage 5 H2 Flow (sccm),Stage 5 C2H4 Flow (sccm),Stage 5 Dwell Time (min),Peak Ratio
Sample 1,2,25,0,0,0,0,0,2,1000,100,...,0,0,680,25,100,0,0,0,0,1.530000
Sample 2,744.297796,387.858757,517.924355,556.599585,461.616348,847.47691,155.082416,801.316205,559.470485,38.180779,...,94.052226,937.794625,923.329645,907.910096,529.83489,452.940664,519.801965,120.006801,382.531292,1.581632
Sample 3,59.162623,402.413603,903.757454,154.730201,913.111638,574.286292,601.848865,60.607036,764.821004,863.411714,...,402.342345,140.387272,582.80356,973.661058,79.906957,76.120954,163.450375,834.527589,713.01472,1.655742
Sample 4,385.612918,187.115404,205.980925,168.96711,761.925941,538.615586,896.118381,862.049275,397.029993,385.92968,...,657.694842,451.30287,914.145503,911.07964,61.818561,995.043444,563.265391,139.162186,893.388194,1.320856
Sample 5,831.663441,737.201198,353.166248,579.448627,770.573135,399.680952,730.30993,759.387424,872.570764,211.417485,...,289.680365,961.972597,19.764651,775.196183,824.411508,410.562199,392.273589,376.201686,313.615527,1.654399
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Sample 96,140.122487,207.42672,7.195786,337.49954,629.239499,817.792987,833.98529,306.270435,455.723708,488.19981,...,934.065928,863.359105,465.401037,203.691669,238.296484,887.155902,403.203646,371.499639,465.545503,1.269471
Sample 97,565.909979,226.240058,992.587436,93.910624,732.139183,431.125528,857.208658,330.734753,160.220844,989.131999,...,177.332167,836.237251,418.568224,988.586987,294.57884,449.666964,180.344929,664.683953,599.991954,1.439038
Sample 98,59.302266,896.291692,33.412358,87.442021,441.279969,748.074119,55.072778,949.509025,672.042233,342.722435,...,666.750595,973.623801,567.145334,133.322286,450.812863,415.341686,608.773827,458.863357,314.163255,1.401774
Sample 99,547.836228,591.353326,887.651632,376.366932,753.919958,423.736536,435.903882,448.722802,568.644115,541.582277,...,215.657455,190.480718,182.295272,991.337781,135.382442,819.892534,341.755916,255.474535,62.412524,1.249358


In [304]:
combined_df.to_csv("random_sample.csv")