In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import torchsummary
import torch.optim as optim
import os
import openpyxl
from torch.utils.data import TensorDataset, DataLoader,random_split
#import tensorflow as tf

df = pd.read_excel("Data for headed bars_for DataFrame_220725.xlsx", skiprows = 17, engine = 'openpyxl', sheet_name= 'headed (2)' )
df = pd.DataFrame(df, columns = ["No.", "Author", "Year", "Test type", "Remark", "Specimen", "fy", "Ld", "fcm", "db", "b", "cos,avg",
                                 "cth", "ch", "Nh", "Bottom cover", "Ah/Ab", "Fsu at La, test", "dtr", "Ntr", "st"]) # st 제거시

df = df[df["Test type"] == "Joint type"]
y= df["Fsu at La, test"] 

pd.set_option('display.max_rows',None)

y2=y[~y.isnull()]
X = df[["Test type", "fy", "Ld", "fcm", "db", "b", "cos,avg", "cth", "ch", "Nh", "Bottom cover", "Ah/Ab", "st"]] 

X = pd.get_dummies(data = X, columns = ["Test type"], prefix = "Test_type")
X = X[~y.isnull()]

pd.options.display.max_rows = None
X.dropna(inplace = True)

y2 = y2[X.index]
y2 = y2.loc[(y2 != 0)] # series
X = X.loc[y2.index] # DataFrame

* train, test 분류

In [18]:
#test, train+valid 분류
from sklearn.model_selection import train_test_split
np.random.seed(0)
X_train, X_test, y_train, y_test = train_test_split(X, y2, test_size=0.20, random_state=142)
#print(X_test.shape, y_test.shape)

* 분류한 데이터 스케일링 후 텐서로 변환

In [19]:
# 분류한 데이터 스케일링
from sklearn.preprocessing import MinMaxScaler
scX = MinMaxScaler() #형태는 넘파이
x_train_scaled = scX.fit_transform(X_train)   
x_test_scaled = scX.transform(X_test)

scY = MinMaxScaler()
y_train_scaled = scY.fit_transform(y_train.values.reshape(-1,1)) 
y_test_scaled = scY.transform(y_test.values.reshape(-1,1))

#스케일링->텐서로
x_test_tensor = torch.FloatTensor(x_test_scaled)
y_test_tensor = torch.FloatTensor(y_test_scaled)
x_train_tensor = torch.FloatTensor(x_train_scaled)
y_train_tensor = torch.FloatTensor(y_train_scaled)

* 파이토치 이용한 모델 구현

In [20]:
#데이터 모델 클래스로 구현
class  MyModel(nn.Module):
    def __init__(self):
        super( MyModel, self).__init__()
        self.layer = nn.Linear(13, 1)

    def forward(self, x):
        return self.layer(x)
    
#커스텀 데이터 셋
class CustomDataset(TensorDataset): 
    def __init__(self):
        self.x = x_train_tensor
        self.y = y_train_tensor
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x[idx])
        y = torch.FloatTensor(self.y[idx]) 
        return x, y
    
    def __len__(self): 
        return len(self.x)

#데이터 셋에서 train, validation 나누기 
dataset = CustomDataset()
dataset_size = len(dataset)
train_size = int(dataset_size * 0.8)
validation_size = dataset_size - train_size

train_dataset, validation_dataset = random_split(dataset, [train_size, validation_size])
print(f"Training Data Size : {len(train_dataset)}")
print(f"Validation Data Size : {len(validation_dataset)}")

# 데이터 로더
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=True)
#test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True, drop_last=True)

Training Data Size : 172
Validation Data Size : 44


학습

In [22]:
train_losses = []
val_losses = []
train_acc = []
val_acc = []

device = "cuda" if torch.cuda.is_available() else "cpu"
model = MyModel().to(device)
criterion = nn.MSELoss().to(device) #
optimizer = torch.optim.Adam(model.parameters(), lr = 0.03) #
nb_epochs = 10000

for epoch in range(nb_epochs+1):
    train_loss = 0       
    train_accuracy = 0 
    for _,data in enumerate(train_dataloader):
        x,y =data
        x = x.to(device)
        y = y.to(device)
        prediction = model(x)
        cost =criterion(prediction, y)
            
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        train_loss += cost.item()
        #train_accuracy += torch.sum(prediction == y.data).detach().cpu().numpy()/x.size(0)
        
    model.eval()
    val_loss = 0
    #val_accuracy = 0
    with torch.no_grad():
        for _,data in enumerate(validation_dataloader):
            x,y = data
            x = x.to(device)
            y = y.to(device)
            prediction = model(x)
            cost = criterion(prediction, y)
            
            val_loss += cost.item()
            #val_accuracy += torch.sum(prediction == y.data).detach().cpu().numpy()/x.size(0) #분류에 사용

    # calculate mean for each batch
    train_losses.append(train_loss / len(train_dataloader))
    val_losses.append(val_loss / len(validation_dataloader))
    train_acc.append(train_accuracy / len(train_dataloader))
    val_acc.append(val_accuracy / len(validation_dataloader))
    
    if epoch % 1000 == 0:
        print("Epoch:{:4d}/{}".format(epoch, nb_epochs),
              "Train Loss: {:.6f}".format(train_loss / len(train_dataloader)),
              "Val Loss: {:.6f}".format(val_loss / len(validation_dataloader)))
              

history = {'train_loss': train_losses, 'val_loss': val_losses,
            'train_acc': train_acc, 'val_acc': val_acc}


NameError: name 'val_accuracy' is not defined

In [None]:
plt.figure(figsize = (8,4))
plt.plot(history['train_loss'],label = "Train loss")
plt.plot(history['val_loss'],label = "Valid loss")
plt.title(f'Loss', color='white', fontweight = 'bold')
plt.ylabel('Loss', color='white')
plt.xlabel('epoch', color='white')

plt.legend(), plt.grid()

: 

평가

In [None]:
y_p = model(x_test_tensor)
y_p[:10], y_test_scaled[:10]

: 

In [None]:
y_test_unscaled = scY.inverse_transform(y_test_scaled)  
print(len(y_test_unscaled),y_test_unscaled, type(y_test_unscaled))

y_p_unscaled = scY.inverse_transform(y_p.detach().numpy())
print(len(y_p_unscaled),y_p_unscaled)

: 

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6,6))

ax.plot(y_test_unscaled, y_p_unscaled, 'r.')
ax.set_xlabel("Tested tensile stress, ft_test (MPa)", fontsize = 14,color = 'white')
ax.set_ylabel("Predicted tensile stress, ft_pred (MPa)", fontsize = 14, color = 'white')
x = np.linspace(0, 1000, 100)
y = x
ax.plot(x, y, 'b')
fig.show()


: 

In [None]:
x_test_unscaled = scX.inverse_transform(x_test_scaled)  
print(x_test_unscaled)

: 

모델 평가

In [None]:
test_loss = 0
#test_accuracy = 0
model.eval()
with torch.no_grad():
    for i in range(len(y_test_unscaled)):
        pred = model(x_test_tensor)
        cost = criterion(pred, y_test_tensor)
        test_loss += cost.item()
        #test_accuracy += torch.sum(pred == y_test_tensor).detach().cpu().numpy()/x_test_tensor.size(0)
        
    print('test_loss:',test_loss)
    #print(f'test_acc:{test_accuracy:.3f}')


: 

In [None]:
division = y_p_unscaled / y_test_unscaled
cov = np.std(division) / np.mean(division)
print(cov)

: 

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test_unscaled, y_p_unscaled)

: 

: 

: 