In [1]:
import pandas as pd
import json
import numpy as np

import joblib

pd.set_option('display.max_colwidth',  None)

with open('result.json') as f:
    data = json.load(f)

df = pd.DataFrame(data)

df.fillna(0, inplace=True)
df = df.apply(lambda series: pd.to_numeric(series, errors='coerce'))
df = df.dropna()
df.head()

Unnamed: 0,Epidian 6D,Araldite GY260,Araldite GY250CH,CHS-520 (*CHS-530),CHS-525,CHS-590,Epidian 5,Epidian 6,Epilox AF 18-50,AH-24/Grilonit epoxide 8,...,trisDMP,DCH-99,MXDA,Ethacure 100,nonilfenol,etanol,AHEW,Fazékidő (min),Szakítószilárdság [MPa],Szakadási nyúlás [%]
0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0,0,24.4,46.0,82.57,5.58
1,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0,0,27.685325,53.0,41.72,3.35
2,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0,0,31.228669,34.0,56.6,4.76
3,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0,0,35.812133,28.0,61.23,5.59
4,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0,0,27.685325,61.0,41.13,3.67


In [2]:
del df["EEW"]
# del df["nonilfenol"]
# del df["etanol"]
del df["AHEW"]
df.head()

Unnamed: 0,Epidian 6D,Araldite GY260,Araldite GY250CH,CHS-520 (*CHS-530),CHS-525,CHS-590,Epidian 5,Epidian 6,Epilox AF 18-50,AH-24/Grilonit epoxide 8,...,APU-4,trisDMP,DCH-99,MXDA,Ethacure 100,nonilfenol,etanol,Fazékidő (min),Szakítószilárdság [MPa],Szakadási nyúlás [%]
0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0,0,46.0,82.57,5.58
1,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0,0,53.0,41.72,3.35
2,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0,0,34.0,56.6,4.76
3,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0,0,28.0,61.23,5.59
4,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0,0,61.0,41.13,3.67


In [3]:


from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader


dataset = torch.FloatTensor(df.iloc[:, -3:].values)
ss = StandardScaler().fit(dataset)
dataset = ss.transform(dataset)

loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [43]:


class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(3, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 6)  # Output size needs to be twice the size of your latent space (for mean and variance)
        )

        self.decoder = nn.Sequential(
            nn.Linear(3, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 3),
            nn.Sigmoid()  # Use sigmoid if your data is normalized between 0 and 1
        )

    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var/2)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu, log_var = h.chunk(2, dim=1)  # Split your hidden state into mean and variance components
        z = self.reparameterize(mu, log_var)
        x_reconstructed = self.decoder(z)
        return x_reconstructed, mu, log_var


# Instantiate the model
model = VAE()

# Create your optimizer
optimizer = Adam(model.parameters(), lr=1e-4)

# Loss function
def vae_loss(x_reconstructed, x, mu, log_var):
    reconstruction_loss = nn.functional.l1_loss(x_reconstructed, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return reconstruction_loss + kl_divergence


# Training loop
def train(model, data_loader, epochs=50):
    model.train()
    for epoch in range(epochs):
        for batch in data_loader:
            batch = batch.float() # Ensuring your data is of the correct dtype
            optimizer.zero_grad()
            x_reconstructed, mu, log_var = model(batch)
            loss = vae_loss(x_reconstructed, batch, mu, log_var)
            loss.backward()
            optimizer.step()
        print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

# Assuming your data is a PyTorch DataLoader
train(model, loader, epochs=1000)


Epoch: 1, Loss: 36.445194244384766
Epoch: 2, Loss: 19.123098373413086
Epoch: 3, Loss: 27.429000854492188
Epoch: 4, Loss: 17.854982376098633
Epoch: 5, Loss: 25.77815055847168
Epoch: 6, Loss: 17.520448684692383
Epoch: 7, Loss: 19.306396484375
Epoch: 8, Loss: 30.690139770507812
Epoch: 9, Loss: 15.989909172058105
Epoch: 10, Loss: 20.901887893676758
Epoch: 11, Loss: 24.823360443115234
Epoch: 12, Loss: 19.841588973999023
Epoch: 13, Loss: 15.126029014587402
Epoch: 14, Loss: 17.507604598999023
Epoch: 15, Loss: 13.709592819213867
Epoch: 16, Loss: 15.726800918579102
Epoch: 17, Loss: 19.573524475097656
Epoch: 18, Loss: 15.9361572265625
Epoch: 19, Loss: 11.2841796875
Epoch: 20, Loss: 20.508216857910156
Epoch: 21, Loss: 14.140878677368164
Epoch: 22, Loss: 15.234188079833984
Epoch: 23, Loss: 14.71925163269043
Epoch: 24, Loss: 11.461821556091309
Epoch: 25, Loss: 12.219651222229004
Epoch: 26, Loss: 18.175691604614258
Epoch: 27, Loss: 20.780494689941406
Epoch: 28, Loss: 14.114869117736816
Epoch: 29, Lo

In [56]:
model.eval()
with torch.no_grad():
    z = torch.randn(1, 3)  # Generate a random latent vector
    synthetic_data = model.decoder(z)  # Decode the latent vector into a synthetic data point
print(ss.inverse_transform(synthetic_data))

[[33.64820068 65.58874538  4.8379227 ]]
