In [1]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# df_X = pd.DataFrame(X, columns = ['p', 'T'])
# df_y = pd.DataFrame(y, columns = ['E'])

df_X = pd.read_csv("df_y_energy.csv")
df_y = pd.read_csv("df_X_macroparams.csv")

min_T = min(df_y['T'])
max_T = max(df_y['T'])
print(f"Temperature changes from {min_T} to {max_T}")

min_p = min(df_y['p'])
max_p = max(df_y['p'])
print(f"Pressure changes from {min_p} to {max_p}")

min_E = min(df_X['E'])
max_E = max(df_X['E'])
print(f"Energy changes from {min_E} to {max_E}")

# df_X.to_csv('df_X_macroparams.csv', index=False)  
# df_y.to_csv('df_y_energy.csv', index=False)  



Temperature changes from 250 to 1999
Pressure changes from 5 to 499
Energy changes from 0.0700428745158648 to 29.88182219811612


In [3]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df_X[['E']] = scaler.fit_transform(df_X[['E']])

scaler = MinMaxScaler()
df_y[['p', 'T']] = scaler.fit_transform(df_y[['p', 'T']])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.01, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.01, random_state=42)

print(f'Train, validation, test lengths = {X_train.shape[0]}, {X_valid.shape[0]}, {X_test.shape[0]}')

Train, validation, test lengths = 857587, 8576, 87


In [5]:
from torch.utils.data import Dataset, DataLoader

class Energy(Dataset):
    def __init__(self, X, y=None):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X.index)
    
    def __getitem__(self, index):
        x = self.X.iloc[index, ].values
        x = torch.tensor(x)
        x = x.to(torch.float32)
            
        if self.y is not None:
            output = torch.tensor(self.y.iloc[index])
            output = output.to(torch.float32)
            return x, output
        else:
            return x

train_dataset = Energy(X=X_train, y=y_train)
valid_dataset = Energy(X=X_valid, y=y_valid)
test_dataset = Energy(X=X_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)

In [10]:
import torch
import torch.nn as nn

class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(1, 50),
            nn.Tanh(),
#             nn.Dropout(0.2),
            nn.Linear(50, 2)
        )
        
    def forward(self, x):
        x = self.layers(x)
        return x

model = NN()
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

NN(
  (layers): Sequential(
    (0): Linear(in_features=1, out_features=50, bias=True)
    (1): Tanh()
    (2): Linear(in_features=50, out_features=2, bias=True)
  )
)


In [None]:
mean_train_losses = []
mean_valid_losses = []
valid_acc_list = []

epochs = 20

for epoch in range(epochs):
    model.train()
    train_losses = []
    valid_losses = []
    for i, (xs, output) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(xs)

        loss = loss_fn(outputs, output)
        loss.backward()
        optimizer.step()
        
        train_losses.append(loss.item())

            
    model.eval()

    with torch.no_grad():
        for i, (xs, output) in enumerate(valid_loader):
            outputs = model(xs)
            loss = loss_fn(outputs, output)
            valid_losses.append(loss.item())
               
    mean_train_losses.append(np.mean(train_losses))
    mean_valid_losses.append(np.mean(valid_losses))
    
    print('epoch : {}, train loss : {:.6f}, valid loss : {:.6f}'.format(epoch+1, np.mean(train_losses), np.mean(valid_losses)))