In [1]:
import pandas as pd
import numpy as np

In [2]:
real = pd.read_csv('ex.txt',sep=r'\s+')
real

Unnamed: 0,Synthesis_temperature,Sintering_temperature,Sintering_time,Heating_rate,Particle_size,Standard_deviation,Surface_area,Tab_density
0,800,700,1.0,200,1.7,4.5,1.3,1.026
1,800,900,12.0,200,4.7,1.6,0.5,1.784
2,800,700,12.0,700,1.9,3.9,1.2,0.986
3,800,900,1.0,700,3.4,2.9,0.7,1.393
4,900,700,12.0,200,1.8,4.1,2.8,0.997
5,900,900,1.0,200,3.2,2.2,0.7,1.707
6,900,700,1.0,700,1.5,6.4,4.1,1.133
7,900,900,12.0,700,4.7,1.6,0.5,2.09
8,850,800,6.5,450,2.4,3.6,1.8,1.232
9,850,800,6.5,450,2.4,3.0,1.9,1.245


In [3]:
from data import generate_mc_data
from sklearn.model_selection import train_test_split

# Check if the required columns are present
features = ['Synthesis_temperature', 'Sintering_temperature', 'Sintering_time', 'Heating_rate']
targets = ['Particle_size', 'Standard_deviation', 'Surface_area', 'Tab_density']

# Extract features
X = real[features].copy()

# # Create interaction terms
# X['Sintering_temperature_&_Heating_rate'] = (
#     X['Sintering_temperature'] * X['Heating_rate']
# )
X['Sintering_temperature_&_Sintering_time'] = (
	X['Sintering_temperature'] * X['Sintering_time']
)
y = real[targets]

X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=1
        )


In [4]:
X_train_fold, X_val_fold, y_train_fold, y_val_fold = train_test_split(
               X_train, y_train, test_size=0.1, random_state=42
            )
X_train_mc, y_train_mc = generate_mc_data(
                X_train_fold, y_train_fold, 2, 0.01, 0.01
            )
X_train_fold
y_train_fold

Unnamed: 0,Particle_size,Standard_deviation,Surface_area,Tab_density
1,4.7,1.6,0.5,1.784
11,2.6,3.2,1.9,1.529
10,2.7,3.4,1.8,1.399
5,3.2,2.2,0.7,1.707
6,1.5,6.4,4.1,1.133
7,4.7,1.6,0.5,2.09
0,1.7,4.5,1.3,1.026
9,2.4,3.0,1.9,1.245


In [5]:
X_train_mc

Unnamed: 0,Synthesis_temperature,Sintering_temperature,Sintering_time,Heating_rate,Sintering_temperature_&_Sintering_time
0,802.437737,899.84879,12.04425,199.143344,10838.003869
1,841.160135,793.175649,6.437673,448.415399,5106.205164
2,856.378835,807.035184,6.557099,452.395391,5291.809814
3,908.465082,907.000127,0.999501,200.730888,906.547299
4,882.440683,700.462215,0.998151,702.889128,699.167324
5,888.280384,910.145171,11.918288,703.015747,10847.372682
6,801.022723,703.272565,1.012225,204.283295,711.870363
7,847.311938,793.12566,6.489956,448.171132,5147.350308
8,805.689813,891.724929,12.054813,200.961493,10749.577297
9,856.743451,803.977286,6.456975,452.00939,5191.261086


In [6]:
y_train_fold

Unnamed: 0,Particle_size,Standard_deviation,Surface_area,Tab_density
1,4.7,1.6,0.5,1.784
11,2.6,3.2,1.9,1.529
10,2.7,3.4,1.8,1.399
5,3.2,2.2,0.7,1.707
6,1.5,6.4,4.1,1.133
7,4.7,1.6,0.5,2.09
0,1.7,4.5,1.3,1.026
9,2.4,3.0,1.9,1.245


In [7]:
y_train_mc

Unnamed: 0,Particle_size,Standard_deviation,Surface_area,Tab_density
0,4.675925,1.611892,0.503395,1.779091
1,2.578842,3.217381,1.901284,1.551858
2,2.716631,3.377373,1.805204,1.386887
3,3.236127,2.205108,0.704419,1.723529
4,1.498291,6.407468,4.040257,1.113933
5,4.660513,1.603499,0.498402,2.083001
6,1.685984,4.539214,1.293885,1.02767
7,2.415614,3.006708,1.887861,1.252298
8,4.637833,1.585062,0.497029,1.765741
9,2.574072,3.193426,1.872525,1.531741


In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X_train_mc)
X_train_scale = scaler.transform(X_train_mc)

X_val_fold = scaler.transform(X_val_fold)

# Convert back to DataFrame to retain feature names
X_train_scale = pd.DataFrame(X_train_scale, columns=X_train.columns)
X_train_scale['Sintering_temperature_&_Sintering_time'] = (
	X_train_scale['Sintering_temperature'] * X_train_scale['Sintering_time']
)
X_val_fold = pd.DataFrame(X_val_fold, columns=X_train.columns)

In [9]:
X_val_fold

Unnamed: 0,Synthesis_temperature,Sintering_temperature,Sintering_time,Heating_rate,Sintering_temperature_&_Sintering_time
0,-0.11147,-0.172127,0.160101,0.159596,0.066902
