In [1]:
import requests
import pandas as pd
from funcoes import *

In [2]:
r = requests.get("https://mof.tech.northwestern.edu/mofs.json")

print(r)

<Response [200]>


In [3]:
data = r.json()['results']

In [4]:
df = pd.DataFrame.from_dict(data)

In [5]:
df = funcao_extrair_features_isotherms(df)

In [6]:
def importar_dados(dataframe):
    'importar os dados provenientes do site das MOFs'
    dados_hmof = pd.read_excel(dataframe)
    return dados_hmof

In [7]:
dataframe = ('Pasta1.xlsx')
df_import = importar_dados(dataframe)

## Importações



In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

## Código e discussão



### Divisão treino-teste

In [9]:
TAMANHO_TESTE = 0.1
SEMENTE_ALEATORIA = 61455
FEATURES = ['lcd', 'pld', 'void_fraction', 'surface_area_m2g', 'surface_area_m2cm3', 'CO2_0.01', 'CO2_0.1', 'CO2_2.5', 'CO2_0.05', 'CO2_0.5',
            'N2_0.09', 'N2_0.9', 'CH4_2.5', 'CH4_4.5', 'CH4_0.05', 'CH4_0.5', 'CH4_0.9', 'CH4_35', 'H2_2', 'H2_100']
TARGET = ['Xe_1']

indices = df.index
indices_treino, indices_teste = train_test_split(
    indices, test_size=TAMANHO_TESTE, random_state=SEMENTE_ALEATORIA
)

df_treino = df.loc[indices_treino]
df_teste = df.loc[indices_teste]

X_treino = df_treino.reindex(FEATURES, axis=1)
y_treino = df_treino.reindex(TARGET, axis=1)
X_teste = df_teste.reindex(FEATURES, axis=1)
y_teste = df_teste.reindex(TARGET, axis=1)

In [10]:
normalizador_x = MinMaxScaler()
normalizador_y = MinMaxScaler()

normalizador_x.fit(X_treino)
normalizador_y.fit(y_treino)

X_treino = normalizador_x.transform(X_treino)
y_treino = normalizador_y.transform(y_treino)
X_teste = normalizador_x.transform(X_teste)
y_teste = normalizador_y.transform(y_teste)

In [11]:
X_treino = torch.tensor(X_treino, dtype=torch.float32)
y_treino = torch.tensor(y_treino, dtype=torch.float32)
X_teste = torch.tensor(X_teste, dtype=torch.float32)
y_teste = torch.tensor(y_teste, dtype=torch.float32)

In [12]:
print(X_treino)
print()
print(y_treino)

tensor([[0.0000, 0.0870, 0.0682,  ..., 0.2837, 0.2522, 0.0217],
        [0.6818, 0.6957, 0.9635,  ..., 0.8317, 0.3059, 0.9875],
        [0.5909, 0.6087, 0.8832,  ..., 0.6870, 0.2084, 0.8964],
        ...,
        [0.5000, 0.4783, 0.8936,  ..., 0.9445, 0.6573, 0.9470],
        [0.6364, 0.6522, 0.9116,  ..., 0.6851, 0.1529, 0.9118],
        [0.3636, 0.3913, 0.7463,  ..., 0.8434, 0.7069, 0.8041]])

tensor([[0.0791],
        [0.2145],
        [0.0996],
        [0.3141],
        [0.4189],
        [0.1077],
        [0.3269],
        [0.3659],
        [0.2356],
        [0.1323],
        [0.1049],
        [0.1044],
        [0.5871],
        [0.2605],
        [0.3615],
        [0.2972],
        [0.0874],
        [0.5335],
        [0.3386],
        [0.0551],
        [0.2336],
        [0.1077],
        [0.1703],
        [0.1293],
        [0.2368],
        [0.1284],
        [0.1413],
        [0.3576],
        [0.0902],
        [0.1426],
        [0.1285],
        [0.4786],
        [0.4571],
       

In [13]:
class MLP(nn.Module):
    def __init__(
        self, num_dados_entrada, neuronios_c1, neuronios_c2, num_targets
    ):
        # Temos que inicializar a classe mãe
        super().__init__()

        # Definindo as camadas da rede
        self.camadas = nn.Sequential(
                    nn.Linear(num_dados_entrada, neuronios_c1),
                    nn.ReLU(),
                    nn.Linear(neuronios_c1, neuronios_c2),
                    nn.ReLU(),
                    nn.Linear(neuronios_c2, num_targets),
                )

    def forward(self, x):
        """Esse é o método que executa a rede do pytorch."""
        x = self.camadas(x)
        return x

In [14]:
NUM_DADOS_DE_ENTRADA = X_treino.shape[1]
NUM_DADOS_DE_SAIDA = y_treino.shape[1]
NEURONIOS_C1 = 50
NEURONIOS_C2 = 20

minha_MLP = MLP(NUM_DADOS_DE_ENTRADA, NEURONIOS_C1, NEURONIOS_C2, NUM_DADOS_DE_SAIDA)

In [15]:
for p in minha_MLP.parameters():
    print(p)

Parameter containing:
tensor([[ 0.0528,  0.2180, -0.1329,  0.1710,  0.0145, -0.1705,  0.0623,  0.1666,
         -0.0655, -0.0796, -0.0549,  0.1480,  0.0542,  0.1950, -0.0861,  0.1242,
          0.1890,  0.0146, -0.1981,  0.1882],
        [-0.1006, -0.2026, -0.0694, -0.1707, -0.0181, -0.1810,  0.0134, -0.1948,
          0.1032,  0.1622,  0.0434,  0.0454,  0.1060, -0.0473,  0.2044,  0.0022,
         -0.0103,  0.1243, -0.1244, -0.1515],
        [-0.1130,  0.1484,  0.1490, -0.1771, -0.1816, -0.1791,  0.0328, -0.1487,
         -0.1029, -0.1199, -0.0907,  0.0935,  0.0523, -0.0795, -0.2056, -0.0411,
          0.0099, -0.1047, -0.0133, -0.1157],
        [ 0.1313, -0.0747,  0.1336, -0.1626, -0.0998,  0.0038, -0.1047,  0.0103,
          0.1650,  0.1427, -0.0331, -0.1751,  0.0997,  0.0599,  0.1232,  0.0693,
         -0.0388, -0.0648,  0.2147, -0.1625],
        [-0.2052,  0.1272,  0.1599, -0.0289, -0.0987,  0.1041,  0.0465, -0.0763,
         -0.1966,  0.1085,  0.1365,  0.2177,  0.1780, -0.0045, -0

In [16]:
y_prev = minha_MLP(X_treino)
y_prev

tensor([[-0.1400],
        [-0.1937],
        [-0.1803],
        [-0.1611],
        [-0.1646],
        [-0.1848],
        [-0.1653],
        [-0.1898],
        [-0.1949],
        [-0.1877],
        [-0.1791],
        [-0.1867],
        [-0.1796],
        [-0.1610],
        [-0.1700],
        [-0.1817],
        [-0.1411],
        [-0.1777],
        [-0.1804],
        [-0.1447],
        [-0.1854],
        [-0.1828],
        [-0.1574],
        [-0.1521],
        [-0.1839],
        [-0.1515],
        [-0.1467],
        [-0.1616],
        [-0.1830],
        [-0.1845],
        [-0.1890],
        [-0.1718],
        [-0.1812],
        [-0.1385],
        [-0.1829],
        [-0.1852],
        [-0.1428],
        [-0.1851],
        [-0.1617],
        [-0.1788],
        [-0.1852],
        [-0.1851],
        [-0.1788],
        [-0.1848],
        [-0.1839],
        [-0.1793],
        [-0.1622],
        [-0.1759],
        [-0.1844],
        [-0.1901],
        [-0.1681],
        [-0.1576],
        [-0.

In [17]:
TAXA_DE_APRENDIZADO = 0.001

# função perda será o erro quadrático médio
fn_perda = nn.MSELoss()

# otimizador será o Adam, um tipo de descida do gradiente
otimizador = optim.Adam(minha_MLP.parameters(), lr=TAXA_DE_APRENDIZADO)

In [18]:
minha_MLP.train()

MLP(
  (camadas): Sequential(
    (0): Linear(in_features=20, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=1, bias=True)
  )
)

In [19]:
NUM_EPOCAS = 1000

y_true = y_treino

for epoca in range(NUM_EPOCAS):
    # forward pass
    y_pred = minha_MLP(X_treino)

    # zero grad
    otimizador.zero_grad()

    # loss
    loss = fn_perda(y_pred, y_true)

    # backpropagation
    loss.backward()

    # atualiza parâmetros
    otimizador.step()

    # mostra resultado
    print(epoca, loss.data)

0 tensor(0.2063)
1 tensor(0.1932)
2 tensor(0.1805)
3 tensor(0.1681)
4 tensor(0.1561)
5 tensor(0.1445)
6 tensor(0.1333)
7 tensor(0.1225)
8 tensor(0.1121)
9 tensor(0.1019)
10 tensor(0.0920)
11 tensor(0.0825)
12 tensor(0.0735)
13 tensor(0.0649)
14 tensor(0.0569)
15 tensor(0.0494)
16 tensor(0.0424)
17 tensor(0.0361)
18 tensor(0.0306)
19 tensor(0.0257)
20 tensor(0.0213)
21 tensor(0.0177)
22 tensor(0.0148)
23 tensor(0.0128)
24 tensor(0.0115)
25 tensor(0.0111)
26 tensor(0.0114)
27 tensor(0.0121)
28 tensor(0.0132)
29 tensor(0.0142)
30 tensor(0.0151)
31 tensor(0.0156)
32 tensor(0.0157)
33 tensor(0.0155)
34 tensor(0.0149)
35 tensor(0.0141)
36 tensor(0.0132)
37 tensor(0.0123)
38 tensor(0.0115)
39 tensor(0.0108)
40 tensor(0.0102)
41 tensor(0.0098)
42 tensor(0.0095)
43 tensor(0.0093)
44 tensor(0.0092)
45 tensor(0.0090)
46 tensor(0.0089)
47 tensor(0.0088)
48 tensor(0.0087)
49 tensor(0.0085)
50 tensor(0.0084)
51 tensor(0.0081)
52 tensor(0.0079)
53 tensor(0.0076)
54 tensor(0.0073)
55 tensor(0.0070)
56

In [20]:
with torch.no_grad():
    y_true = normalizador_y.inverse_transform(y_treino)
    y_pred = minha_MLP(X_treino)
    y_pred = normalizador_y.inverse_transform(y_pred)

for yt, yp in zip(y_true, y_pred):
    print(yt, yp)

[0.57337847] [0.5447131]
[1.553999] [1.55711915]
[0.72190402] [0.74646808]
[2.27605493] [2.34879369]
[3.03556895] [2.82934694]
[0.78030701] [0.78916003]
[2.368495] [2.34849093]
[2.65115801] [2.66850028]
[1.70751903] [1.76820132]
[0.95890204] [1.00178814]
[0.760066] [0.7697684]
[0.75630199] [0.75016622]
[4.25403996] [4.26747376]
[1.8873291] [1.80823277]
[2.61967902] [2.64470954]
[2.15343697] [2.10218909]
[0.63327298] [0.65110297]
[3.86579008] [3.95719035]
[2.45340706] [2.40538913]
[0.3991435] [0.38247712]
[1.692429] [1.7319885]
[0.78046498] [0.89695974]
[1.23384097] [1.20803843]
[0.937126] [0.95344046]
[1.71586797] [1.73859548]
[0.93059903] [0.87903149]
[1.02393604] [0.99882487]
[2.59136392] [2.55984563]
[0.65360399] [0.6080953]
[1.03309998] [1.04586627]
[0.931082] [0.9060896]
[3.46831008] [3.56941902]
[3.31250995] [3.29716866]
[0.60076829] [0.71016057]
[1.99398297] [2.09685989]
[4.56051013] [4.84297792]
[0.49076277] [0.47739521]
[0.64449702] [0.63070221]
[2.51258008] [2.69471614]
[3.26

In [21]:
minha_MLP.eval()

MLP(
  (camadas): Sequential(
    (0): Linear(in_features=20, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=1, bias=True)
  )
)

In [22]:
with torch.no_grad():
    y_true = normalizador_y.inverse_transform(y_teste)
    y_pred = minha_MLP(X_teste)
    y_pred = normalizador_y.inverse_transform(y_pred)

for yt, yp in zip(y_true, y_pred):
    print(yt, yp)

[0.74338086] [0.5979651]
[1.09562605] [1.02518434]
[0.00526056] [0.02477354]
[0.93107099] [0.9542006]
[0.96158997] [0.92993347]
[3.89166992] [3.91171005]
[3.0270899] [2.89007545]
[1.13198603] [1.10160526]
[0.91508726] [0.85217069]
[2.70047164e-13] [-0.04020032]
[0.74763] [0.70713565]
[0.992004] [1.03575616]
[2.39291091] [2.57552055]
[1.82485507] [1.91430229]
[1.93217601] [1.8512262]
[0.0331167] [0.06616969]
[0.70727699] [0.68497112]
[1.27770099] [1.24469546]
[4.21562986] [4.35794674]
[0.77424353] [0.82335307]


In [23]:
RMSE = mean_squared_error(y_true, y_pred, squared=False)
print(f'Loss do teste: {RMSE}')

Loss do teste: 0.08077655239661852
