In [27]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

torch.manual_seed(1)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os


def pred_dataset(file_names, feature_set = ['BET','Vt'] ):
    source_path = 'C:/Kai_Zhang/MachineLearning/Unified gas Adsorption/CO2_adsorption/new_data'
    train_df = pd.DataFrame()
    test_df = pd.DataFrame()
    for file_name in file_names:
        temp_data = pd.read_excel(os.path.join(source_path,file_name+'-02-02-2022.xlsx'),skiprows= 1 )
        temp_data = temp_data.dropna(axis=0,how = 'any',subset = feature_set)
        temp_data = temp_data[temp_data['Pressure']>0.01]
        index = list(set(temp_data['Index'].values))
        test_index= np.random.choice(index,int(0.2*len(index)),replace=False)
        train_x = temp_data.loc[~temp_data['Index'].isin( test_index)]
        test_x = temp_data.loc[temp_data['Index'].isin(test_index)]
        
        train_df = pd.concat([train_df,train_x],axis=0)
        test_df = pd.concat([test_df,test_x],axis =0)
    return train_df,test_df

In [29]:
import matplotlib.pyplot as plt
from scipy import interpolate
import numpy as np
import pandas as pd

def bi_find(data,k):
        l,r = 0,len(data)-1
        while l<r:
            mid = (l+r)>>1
            if data[mid]>=k:
                r = mid
            else:
                l = mid+1
        return l

def inter_data(data:pd.DataFrame):
    indexs = list(set(data["Index"].values))
    x_mat = np.empty((2,100))
    y_mat = []
    for index in indexs:
        temp_df = data[data["Index"]==index]
        x_old = temp_df['Pressure'].values
        y_old = temp_df['Adsorp(mmol/g)'].values
        res = list(set(temp_df['BET'].values.tolist()))
        f = interpolate.interp1d(x_old, y_old,'slinear')
        
        x_new = np.linspace(min(x_old), max(x_old),num=100)
        #x_new
        """
        for ele in x_old:
            t = bi_find(x_new,ele)

            x_new[t] = ele
        
        if (len(x_new)>150):
            x_new = x_new[len(x_new)-150:]
         = np.arange(min(x_old), max(x_old), (max(x_old)-min(x_old))/150)
        """
        y_new = f(x_new)
        X_feature = np.append(x_new.reshape(-1,1),y_new.reshape(-1,1),axis=1).reshape(2,100).tolist()
        x_mat = np.append(x_mat,X_feature,axis=1)
        y_mat.append(res)
        
    return x_mat,np.array(y_mat).reshape(-1,1)

In [30]:
from torch.utils.data import Dataset, DataLoader
class ChemDataset(Dataset):   
    '''
    Custom Dataset subclass. 
    Serves as input to DataLoader to transform X 
      into sequence data using rolling window. 
    DataLoader using this dataset will output batches 
      of `(batch_size, seq_len, n_features)` shape.
    Suitable as an input to RNNs. 
    '''
    def __init__(self, X: np.ndarray, y: np.ndarray, seq_len: int = 1):
        self.X = torch.tensor(X).float().reshape(-1,2,100)
        self.y = torch.tensor(y).float()
        self.seq_len = seq_len

    def __len__(self):
        return self.X.__len__() - (self.seq_len-1)

    def __getitem__(self, index):
        return (self.X[index-1], self.y[index-1])

In [None]:
np.argwhere(np.isnan(np.array(y_train)))

In [31]:
from torch.utils.data import DataLoader
train_df,test_df = pred_dataset(['CO2'])
x_train,y_train = inter_data(train_df)
x_test,y_test = inter_data(test_df)
data_train = ChemDataset(x_train,y_train)
train_loader = DataLoader(data_train, batch_size=128, shuffle=True, sampler=None,
           batch_sampler=None, num_workers=0, collate_fn=None,
           pin_memory=False, drop_last=False, timeout=0,
           worker_init_fn=-1)
data_test = ChemDataset(x_test,y_test)
test_loader = DataLoader(data_test, batch_size=128, shuffle=True, sampler=None,
           batch_sampler=None, num_workers=0, collate_fn=None,
           pin_memory=False, drop_last=False, timeout=0,
           worker_init_fn=-1)

In [32]:
#%%
class simpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(simpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)
        # h_c shape (n_layers, batch, hidden_size)
        # 初始化hidden和memory cell参数
        h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)

        # forward propagate lstm
        out, (h_n, h_c) = self.lstm(x, (h0, c0))

        # 选取最后一个时刻的输出
        #print(out[:, -1, :].shape)
        out = self.fc(out[:, -1, :])
        return out

In [33]:
#Hyper Parameters
epochs = 100         # 训练整批数据多少次, 为了节约时间, 我们只训练一次
batch_size = 64
time_step = 2      # rnn 时间步数 / 图片高度
input_size = 100     # rnn 每步输入值 / 图片每行像素
hidden_size = 200
num_layers = 3
num_classes = 1
lr = 0.0001

model = simpleLSTM(input_size, hidden_size, num_layers, num_classes).to(device)

# loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr)

In [34]:
# learning rate

total_step = len(train_loader)
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, time_step, input_size).to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 5 == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))


Epoch [1/100], Step [14/14], Loss: 2014425.7500
Epoch [6/100], Step [14/14], Loss: 2046254.1250
Epoch [11/100], Step [14/14], Loss: 2285053.2500
Epoch [16/100], Step [14/14], Loss: 2442652.7500
Epoch [21/100], Step [14/14], Loss: 2349966.7500
Epoch [26/100], Step [14/14], Loss: 2545992.7500
Epoch [31/100], Step [14/14], Loss: 2644849.0000
Epoch [36/100], Step [14/14], Loss: 2579762.7500
Epoch [41/100], Step [14/14], Loss: 2646106.2500
Epoch [46/100], Step [14/14], Loss: 2439876.7500
Epoch [51/100], Step [14/14], Loss: 2452932.2500
Epoch [56/100], Step [14/14], Loss: 2528634.7500
Epoch [61/100], Step [14/14], Loss: 1991328.8750
Epoch [66/100], Step [14/14], Loss: 2868645.7500
Epoch [71/100], Step [14/14], Loss: 2326652.5000
Epoch [76/100], Step [14/14], Loss: 2575630.0000
Epoch [81/100], Step [14/14], Loss: 2389509.5000
Epoch [86/100], Step [14/14], Loss: 2976437.7500
Epoch [91/100], Step [14/14], Loss: 2349288.5000
Epoch [96/100], Step [14/14], Loss: 2159757.5000


In [377]:
model.eval()
with torch.no_grad():
    test_error =0
    count = 0
    for images, labels in test_loader:
        images = images.reshape(-1, time_step, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        test_error += criterion(outputs,labels)*len(labels)
        count +=len(labels)
        
    print('Test error of the model on test images: {} '.format(test_error/count))

Test error of the model on test images: 2451821.5 


# adding a few more ff layers

In [35]:
class simpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(simpleLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)


    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)
        # h_c shape (n_layers, batch, hidden_size)
        # 初始化hidden和memory cell参数
        h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)

        # forward propagate lstm
        out, (h_n, h_c) = self.lstm(x, (h0, c0))

        # 选取最后一个时刻的输出
        #print(out[:, -1, :].shape)
        out = self.fc1(out[:, -1, :])
        out = self.fc2(out)
        return out

In [39]:
#Hyper Parameters
epochs = 150         # 训练整批数据多少次, 为了节约时间, 我们只训练一次
batch_size = 64
time_step = 2      # rnn 时间步数 / 图片高度
input_size = 100     # rnn 每步输入值 / 图片每行像素
hidden_size = 100
num_layers = 3
num_classes = 1
lr = 0.0001

model = simpleLSTM(input_size, hidden_size, num_layers, num_classes).to(device)

# loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr)

In [40]:
# learning rate

total_step = len(train_loader)
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, time_step, input_size).to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 5 == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))


Epoch [1/150], Step [14/14], Loss: 2026825.3750
Epoch [6/150], Step [14/14], Loss: 2973216.2500
Epoch [11/150], Step [14/14], Loss: 2283426.0000
Epoch [16/150], Step [14/14], Loss: 2192192.7500
Epoch [21/150], Step [14/14], Loss: 2496417.2500
Epoch [26/150], Step [14/14], Loss: 2889245.0000
Epoch [31/150], Step [14/14], Loss: 2093203.7500
Epoch [36/150], Step [14/14], Loss: 1782558.8750
Epoch [41/150], Step [14/14], Loss: 2426504.0000
Epoch [46/150], Step [14/14], Loss: 2417032.0000
Epoch [51/150], Step [14/14], Loss: 2117300.7500
Epoch [56/150], Step [14/14], Loss: 1438259.0000
Epoch [61/150], Step [14/14], Loss: 1998714.5000
Epoch [66/150], Step [14/14], Loss: 2564795.2500
Epoch [71/150], Step [14/14], Loss: 1538279.2500
Epoch [76/150], Step [14/14], Loss: 1070405.6250
Epoch [81/150], Step [14/14], Loss: 1884080.3750
Epoch [86/150], Step [14/14], Loss: 1012900.1875
Epoch [91/150], Step [14/14], Loss: 2018664.1250
Epoch [96/150], Step [14/14], Loss: 1599124.5000
Epoch [101/150], Step 

In [None]:
import matplotlib.pyplot as plt
from scipy import interpolate
import numpy as np
import pandas as pd



def bi_find(data,k):
        l,r = 0,len(data)-1
        while l<r:
            mid = (l+r)>>1
            if data[mid]>=k:
                r = mid
            else:
                l = mid+1
        return l

x = data.iloc[:,0].values
y = data.iloc[:,1].values
f = interpolate.interp1d(x, y,'slinear')
xnew = np.arange(min(x), max(x), (max(x)-min(x))/200)
for ele in x:
    t = bi_find(xnew,ele)
    xnew[t] =ele

ynew = f(xnew)   # use interpolation function returned by `interp1d`
plt.plot(x, y, 'o', xnew, ynew, '-')
plt.show()

# One D CNN model

In [41]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(2, 2, 1)
        self.pool = nn.MaxPool1d(2)
        self.conv2 = nn.Conv1d(2, 16, 1)
        self.fc1 = nn.Linear(16 * 5 * 5, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)

        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr)

In [42]:
epochs =200
total_step = len(train_loader)
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, time_step, input_size).to(device)
        labels = labels.to(device)
        # forward pass
        outputs = net(images)
        loss = criterion(outputs, labels)
        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 5 == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))

Epoch [1/200], Step [14/14], Loss: 2323749.5000
Epoch [6/200], Step [14/14], Loss: 2977628.7500
Epoch [11/200], Step [14/14], Loss: 3308356.2500
Epoch [16/200], Step [14/14], Loss: 1747488.6250
Epoch [21/200], Step [14/14], Loss: 2086414.2500
Epoch [26/200], Step [14/14], Loss: 2391811.0000
Epoch [31/200], Step [14/14], Loss: 2457477.0000
Epoch [36/200], Step [14/14], Loss: 2221360.0000
Epoch [41/200], Step [14/14], Loss: 2137192.0000
Epoch [46/200], Step [14/14], Loss: 1261712.5000
Epoch [51/200], Step [14/14], Loss: 1726338.5000
Epoch [56/200], Step [14/14], Loss: 1654463.8750
Epoch [61/200], Step [14/14], Loss: 2027749.8750
Epoch [66/200], Step [14/14], Loss: 1225112.5000
Epoch [71/200], Step [14/14], Loss: 1294942.0000
Epoch [76/200], Step [14/14], Loss: 1269464.5000
Epoch [81/200], Step [14/14], Loss: 1740168.6250
Epoch [86/200], Step [14/14], Loss: 1520866.1250
Epoch [91/200], Step [14/14], Loss: 964939.1875
Epoch [96/200], Step [14/14], Loss: 979641.0625
Epoch [101/200], Step [1

# 2D CNN

In [43]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 2,2)
        self.pool = nn.MaxPool2d(1)
        self.conv2 = nn.Conv2d(2, 8, 1)
        self.fc1 = nn.Linear(792, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)

        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr)

In [44]:
epochs =200
total_step = len(train_loader)
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 1,time_step, input_size).to(device)
        labels = labels.to(device)

        # forward pass
        outputs = net(images)
        loss = criterion(outputs, labels)

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()

    if epoch % 5 == 0:
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))

Epoch [1/200], Step [14/14], Loss: 2170740.5000
Epoch [6/200], Step [14/14], Loss: 2870496.0000
Epoch [11/200], Step [14/14], Loss: 2324477.5000
Epoch [16/200], Step [14/14], Loss: 3012561.2500
Epoch [21/200], Step [14/14], Loss: 2442342.7500
Epoch [26/200], Step [14/14], Loss: 2131799.5000
Epoch [31/200], Step [14/14], Loss: 2154735.2500
Epoch [36/200], Step [14/14], Loss: 2075319.1250
Epoch [41/200], Step [14/14], Loss: 2068151.6250
Epoch [46/200], Step [14/14], Loss: 1889858.8750
Epoch [51/200], Step [14/14], Loss: 1256015.7500
Epoch [56/200], Step [14/14], Loss: 1784232.5000
Epoch [61/200], Step [14/14], Loss: 1612620.5000
Epoch [66/200], Step [14/14], Loss: 1478216.0000
Epoch [71/200], Step [14/14], Loss: 1263379.0000
Epoch [76/200], Step [14/14], Loss: 1451782.2500
Epoch [81/200], Step [14/14], Loss: 1618556.5000
Epoch [86/200], Step [14/14], Loss: 1281440.7500
Epoch [91/200], Step [14/14], Loss: 852093.3125
Epoch [96/200], Step [14/14], Loss: 1359693.1250
Epoch [101/200], Step [

 # Using keras to build a cnn network

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os


def pred_dataset(file_names, feature_set = ['BET','Vt'] ):
    source_path = 'C:/Kai_Zhang/MachineLearning/Unified gas Adsorption/CO2_adsorption/new_data'
    train_df = pd.DataFrame()
    test_df = pd.DataFrame()
    for file_name in file_names:
        temp_data = pd.read_excel(os.path.join(source_path,file_name+'-02-02-2022.xlsx'),skiprows= 1 )
        temp_data = temp_data.dropna(axis=0,how = 'any',subset = feature_set)
        temp_data = temp_data[temp_data['Pressure']>0.01]
        index = list(set(temp_data['Index'].values))
        test_index= np.random.choice(index,int(0.2*len(index)),replace=False)
        train_x = temp_data.loc[~temp_data['Index'].isin( test_index)]
        test_x = temp_data.loc[temp_data['Index'].isin(test_index)]
        
        train_df = pd.concat([train_df,train_x],axis=0)
        test_df = pd.concat([test_df,test_x],axis =0)
    return train_df,test_df

In [2]:
import matplotlib.pyplot as plt
from scipy import interpolate
import numpy as np
import pandas as pd

def bi_find(data,k):
        l,r = 0,len(data)-1
        while l<r:
            mid = (l+r)>>1
            if data[mid]>=k:
                r = mid
            else:
                l = mid+1
        return l

def inter_data(data:pd.DataFrame):
    indexs = list(set(data["Index"].values))
    
    x_mat = []
    y_mat = []
    for index in indexs:
        temp_df = data[data["Index"]==index]
        x_old = temp_df['Pressure'].values
        y_old = temp_df['Adsorp(mmol/g)'].values
        res = list(set(temp_df['BET'].values.tolist()))
        f = interpolate.interp1d(x_old, y_old,'slinear')
        
        x_new = np.linspace(min(x_old), max(x_old),num=100)
        #x_new
        """
        for ele in x_old:
            t = bi_find(x_new,ele)

            x_new[t] = ele
        
        if (len(x_new)>150):
            x_new = x_new[len(x_new)-150:]
         = np.arange(min(x_old), max(x_old), (max(x_old)-min(x_old))/150)
        """
        y_new = f(x_new)
        X_feature = np.append(x_new.reshape(-1,1),y_new.reshape(-1,1),axis=1).reshape(2,100).tolist()
        x_mat.append(X_feature)
        y_mat.append(res)
        
    return x_mat,np.array(y_mat).reshape(-1,1)
    


In [3]:
import  os
import  tensorflow as tf
import  numpy as np
from   tensorflow import keras
from   tensorflow.keras import layers,optimizers,losses
from   tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Activation,Dropout
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
df_train,df_test = pred_dataset(['CO2'])
#df.head()

images,labels = inter_data(df_train)
images1,labels1 = inter_data(df_test)
print(len(images),len(labels))
#images,images1,labels,labels1 = train_test_split(img,label,test_size=0.2)

#from my_load_data import load_data,preprocess

batchsz = 128
# 创建训练集Datset对象

db_train = tf.data.Dataset.from_tensor_slices((tf.cast(images, dtype=tf.float32), tf.convert_to_tensor(labels,dtype=tf.float32)))
db_train = db_train.shuffle(1000).batch(batchsz)
# 创建验证集Datset对象

db_val = tf.data.Dataset.from_tensor_slices((tf.cast(images1, dtype=tf.float32), tf.convert_to_tensor(labels1,dtype=tf.float32)))
db_val = db_val.batch(batchsz)

1716 1716


In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten,Conv1D
#create model
model = Sequential()
#add model layers
model.add(Conv2D(64, kernel_size=1, activation="relu", input_shape=(2,100,1)))
model.add(Conv2D(32, kernel_size=1, activation="relu"))
model.add(Flatten())
model.add(Dense(1, activation="relu"))
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
model.fit(db_train, validation_data = db_val, validation_freq = 1, epochs = 100,
           )

Error: Session cannot generate requests