In [30]:
from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU

# 데이터셋 가져오기
data = fetch_ucirepo(id=270)
X = data.data.features
y = data.data.targets

# 데이터프레임 확인
print(X.head())
print(X.dtypes)

# 문자열 데이터를 수치 데이터로 변환
# 예시로 모든 열에 대해 LabelEncoder를 적용합니다.
# 만약 특정 열이 이미 숫자 형식이라면 변환할 필요가 없습니다.

for col in X.columns:
    if X[col].dtype == 'object':
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])

# 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 표준화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# RNN, LSTM, GRU 모델을 위해 3D 형태로 데이터 변경 (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


               Feature1     Feature2     Feature3     Feature4      Feature5  \
1;10.000000  2:1.868245   3:2.371604   4:2.803678   5:7.512213   6:-2.739388   
1;20.000000  2:2.532401   3:5.411209   4:6.509906   5:7.658469   6:-4.722217   
1;30.000000  2:3.454189   3:8.198175  4:10.508439  5:11.611003   6:-7.668313   
1;40.000000  2:3.451192  3:12.113940  4:16.266853  5:39.910056   6:-7.849409   
1;50.000000  2:4.194839  3:11.455096  4:15.715298  5:17.654915  6:-11.083364   

                 Feature6      Feature7        Feature8     Feature9  \
1;10.000000   7:-3.344671   8:-4.847512  9:15326.691400  10:1.768526   
1;20.000000   7:-5.817651   8:-7.518333  9:23855.781200  10:2.164706   
1;30.000000   7:-9.478675  8:-12.230939  9:37562.300800  10:2.840403   
1;40.000000   7:-9.689894  8:-11.921704  9:38379.066400  10:2.851173   
1;50.000000  7:-13.580692  8:-16.407848  9:51975.589900  10:3.480866   

                Feature10  ...     Feature119       Feature120    Feature121  \
1;10.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index

In [7]:
# 문자열 처리 개선 (예시: LabelEncoder)
from sklearn.preprocessing import LabelEncoder

for column in X.columns:
    if X[column].dtype == 'object':
        le = LabelEncoder()
        X[column] = le.fit_transform(X[column])


In [12]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

nan_cols = X.columns[X.isna().sum() == len(X)]
X = X.drop(nan_cols, axis=1)

# 데이터 스케일링
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


# 시퀀스 생성 함수 정의
sequence_length = 10

def create_sequences(X, sequence_length):
    sequences = []
    for i in range(X.shape[0] - sequence_length):
        sequence = X[i:i + sequence_length]
        sequences.append(sequence)
    return np.array(sequences)

# 시퀀스 생성
X_seq = create_sequences(X_scaled, sequence_length)
y_seq = y[sequence_length:]

# 훈련, 검증, 테스트 세트로 분할
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, shuffle=False)

if y_train.ndim == 1:
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)


In [31]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, GRU, SimpleRNN, Dropout
from keras.optimizers import RMSprop
from keras.metrics import MeanSquaredError
from keras.callbacks import EarlyStopping


# LSTM 모델 정의
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer=RMSprop(), loss='mse', metrics=[MeanSquaredError()])
    return model

# GRU 모델 정의
def build_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(50, activation='relu', input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer=RMSprop(), loss='mse', metrics=[MeanSquaredError()])
    return model

# SimpleRNN(tanh) 모델 정의
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='relu', input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer=RMSprop(), loss='mse', metrics=[MeanSquaredError()])
    return model

# 조기 종료 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 모델 훈련
lstm_model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
gru_model = build_gru_model((X_train.shape[1], X_train.shape[2]))
rnn_model = build_rnn_model((X_train.shape[1], X_train.shape[2]))

lstm_history = lstm_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stopping])
gru_history = gru_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stopping])
rnn_history = rnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stopping])


Epoch 1/100


UnimplementedError: Graph execution error:

Detected at node 'mean_squared_error/Cast' defined at (most recent call last):
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\traitlets\config\application.py", line 1077, in launch_instance
      app.start()
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 570, in run_forever
      self._run_once()
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 1859, in _run_once
      handle._run()
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\asyncio\events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 529, in dispatch_queue
      await self.process_one()
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 518, in process_one
      await dispatch(*args)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 424, in dispatch_shell
      await result
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 766, in execute_request
      reply_content = await reply_content
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\ipkernel.py", line 429, in do_execute
      res = shell.run_cell(
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\asb74\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\asb74\AppData\Local\Temp\ipykernel_20816\1750109219.py", line 43, in <module>
      lstm_history = lstm_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stopping])
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1051, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\training.py", line 1109, in compute_loss
      return self.compiled_loss(
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\losses.py", line 142, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\losses.py", line 268, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\asb74\anaconda3\envs\myenv\lib\site-packages\keras\losses.py", line 1469, in mean_squared_error
      y_true = tf.cast(y_true, y_pred.dtype)
Node: 'mean_squared_error/Cast'
Cast string to float is not supported
	 [[{{node mean_squared_error/Cast}}]] [Op:__inference_train_function_4891]

In [None]:
# 4. 학습 및 검증 손실 시각화
plt.figure(figsize=(12, 8))

# Epochs 기준 그래프
plt.subplot(2, 2, 1)
plt.plot(rnn_history.history['loss'], label='tanh train', color='blue')
plt.plot(rnn_history.history['val_loss'], label='tanh valid', color='blue', linestyle='--')
plt.plot(gru_history.history['loss'], label='GRU train', color='green')
plt.plot(gru_history.history['val_loss'], label='GRU valid', color='green', linestyle='--')
plt.plot(lstm_history.history['loss'], label='LSTM train', color='purple')
plt.plot(lstm_history.history['val_loss'], label='LSTM valid', color='purple', linestyle='--')
plt.yscale('log')
plt.title('Per epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# 시간 기준 그래프
plt.subplot(2, 2, 2)
plt.plot(np.cumsum(rnn_history.history['loss']), label='tanh train', color='blue')
plt.plot(np.cumsum(rnn_history.history['val_loss']), label='tanh valid', color='blue', linestyle='--')
plt.plot(np.cumsum(gru_history.history['loss']), label='GRU train', color='green')
plt.plot(np.cumsum(gru_history.history['val_loss']), label='GRU valid', color='green', linestyle='--')
plt.plot(np.cumsum(lstm_history.history['loss']), label='LSTM train', color='purple')
plt.plot(np.cumsum(lstm_history.history['val_loss']), label='LSTM valid', color='purple', linestyle='--')
plt.yscale('log')
plt.title('Wall Clock Time (seconds)')
plt.xlabel('Epochs')
plt.ylabel('Cumulative Loss')
plt.legend()

# 두 그래프를 합친 제목 추가
plt.suptitle('Gas Sensor Array Drift at Different Concentrations', fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.95])  # suptitle과 그래프가 겹치지 않도록 조정
plt.show()