In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import math
from tensorflow.keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [2]:
data = pd.read_csv('./datasets/data.csv', encoding='CP949')
target = pd.read_csv('./datasets/OECD신용등급.csv', encoding='CP949')
target.drop('신용등급_1', axis=1, inplace=True)

In [3]:
datasets = pd.merge(data, target, on=('국가별', '시점'), how='left')
datasets.columns = ['국가별', '시점', '인당_국민총소득', 'GDP_성장률', '디플레이터', '수출', '수입',
       '무역의존도_수출', '무역의존도_수입', '외환보유액', '부채비율', '국민부담률', '평균근로자세금', '경제활동참가율',
       '고용률', '실업률', '신용등급_1']
datasets['수입'] = datasets['수입'].astype(float)
datasets['무역의존도_수입'] = datasets['무역의존도_수입'].astype(float)
datasets.dropna(inplace=True)
datasets.reset_index(inplace=True)
datasets.drop(['국가별', 'index'], axis=1, inplace=True)
datasets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   시점        116 non-null    int64  
 1   인당_국민총소득  116 non-null    float64
 2   GDP_성장률   116 non-null    float64
 3   디플레이터     116 non-null    float64
 4   수출        116 non-null    float64
 5   수입        116 non-null    float64
 6   무역의존도_수출  116 non-null    float64
 7   무역의존도_수입  116 non-null    float64
 8   외환보유액     116 non-null    int64  
 9   부채비율      116 non-null    float64
 10  국민부담률     116 non-null    float64
 11  평균근로자세금   116 non-null    float64
 12  경제활동참가율   116 non-null    float64
 13  고용률       116 non-null    float64
 14  실업률       116 non-null    float64
 15  신용등급_1    116 non-null    float64
dtypes: float64(14), int64(2)
memory usage: 14.6 KB


In [4]:
lb = LabelEncoder()
datasets['시점'] = lb.fit_transform(datasets['시점'])
datasets.describe()

Unnamed: 0,시점,인당_국민총소득,GDP_성장률,디플레이터,수출,수입,무역의존도_수출,무역의존도_수입,외환보유액,부채비율,국민부담률,평균근로자세금,경제활동참가율,고용률,실업률,신용등급_1
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,1.689655,33475.815517,3.32931,95.59569,221782.2,222941.6,37.006552,37.020345,81676.53,73.539026,34.313793,38.507759,60.153448,65.972414,8.257759,16.97994
std,1.058414,21025.028449,3.148085,13.460676,280534.0,326823.5,22.166097,21.632006,182344.0,41.213343,6.950307,8.705592,5.940245,7.047338,4.170646,3.344374
min,0.0,4070.6,-5.5,58.2,3092.0,3919.0,7.44,2.24,279.0,6.788,11.4,14.7,48.1,46.3,2.6,3.9
25%,1.0,16716.9,1.8,87.7,48423.5,46202.75,20.2925,22.9025,9268.75,45.8295,30.5,32.825,57.025,60.65,5.1,14.5
50%,2.0,30761.75,2.95,100.0,120463.0,89101.5,31.785,29.41,30735.0,63.252,33.1,38.8,59.65,66.1,7.45,18.0
75%,3.0,44388.9,4.2,103.7,307357.5,313317.8,51.1075,50.7875,71707.75,101.0675,39.25,43.775,63.125,71.575,9.925,20.0
max,3.0,110886.0,25.2,143.1,1503400.0,2248800.0,146.18,137.7,1233153.0,234.073,48.0,57.1,81.4,83.8,24.9,20.0


In [5]:
datasets.iloc[:,15] = (round(datasets.iloc[:,15]).astype(int)).astype(str)
# datasets.iloc[:,15] = datasets.iloc[:,15].apply(lambda x: math.trunc(x))
# datasets.iloc[:,15] = datasets.iloc[:,15].astype(str)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(datasets.iloc[:,:15], datasets.iloc[:,15], test_size=0.2, random_state=42)

In [7]:
ss = StandardScaler()
x_train = ss.fit_transform(x_train)
x_test = ss.fit_transform(x_test)

In [8]:
lb = LabelEncoder()
y_train = lb.fit_transform(y_train)
y_train = pd.get_dummies(y_train).values
y_train = sequence.pad_sequences(y_train, maxlen=15)
y_test = lb.fit_transform(y_test)
y_test = pd.get_dummies(y_test).values
y_test = sequence.pad_sequences(y_test, maxlen=15)

In [9]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((92, 15), (24, 15), (92, 15), (24, 15))

In [10]:
model = Sequential()

model.add(Dense(64,input_shape=(15,),activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(15,activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              optimizer='Adam', 
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1024      
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 15)                975       
                                                                 
Total params: 6,159
Trainable params: 6,159
Non-trainable params: 0
_________________________________________________________________


In [20]:
import os
from keras.callbacks import ModelCheckpoint, EarlyStopping

MODEL_DIR = './model/'
if not os.path.exists(MODEL_DIR):
    os.mkdir(MODEL_DIR)

modelpath = './model/{epoch:02d}-{val_loss:4f}.hdf5'
checkpointer = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=50)

history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=100,
batch_size=200, verbose=0, callbacks=[early_stopping_callback, checkpointer])

print('\n Test Accuracy: %.4f' % (model.evaluate(x_test, y_test)[1]))


Epoch 1: val_loss improved from inf to 5.66858, saving model to ./model\01-5.668581.hdf5

Epoch 2: val_loss did not improve from 5.66858

Epoch 3: val_loss did not improve from 5.66858

Epoch 4: val_loss did not improve from 5.66858

Epoch 5: val_loss did not improve from 5.66858

Epoch 6: val_loss did not improve from 5.66858

Epoch 7: val_loss did not improve from 5.66858

Epoch 8: val_loss did not improve from 5.66858

Epoch 9: val_loss did not improve from 5.66858

Epoch 10: val_loss did not improve from 5.66858

Epoch 11: val_loss did not improve from 5.66858

Epoch 12: val_loss did not improve from 5.66858

Epoch 13: val_loss did not improve from 5.66858

Epoch 14: val_loss did not improve from 5.66858

Epoch 15: val_loss did not improve from 5.66858

Epoch 16: val_loss did not improve from 5.66858

Epoch 17: val_loss did not improve from 5.66858

Epoch 18: val_loss did not improve from 5.66858

Epoch 19: val_loss did not improve from 5.66858

Epoch 20: val_loss did not improve 

In [22]:
model.evaluate(x_test, y_test)



5.714257717132568

In [23]:
history.history

{'loss': [0.0021620129700750113,
  0.002157058333978057,
  0.0021524224430322647,
  0.0021478901617228985,
  0.00214352342300117,
  0.0021392146591097116,
  0.0021351182367652655,
  0.0021312106400728226,
  0.0021272997837513685,
  0.0021233672741800547,
  0.002119538141414523,
  0.002115792827680707,
  0.002112132031470537,
  0.002108535263687372,
  0.0021049589850008488,
  0.002101388992741704,
  0.002097815042361617,
  0.002094302559271455,
  0.0020907975267618895,
  0.002087303902953863,
  0.002083827508613467,
  0.002080449601635337,
  0.0020771254785358906,
  0.002073821844533086,
  0.002070533810183406,
  0.0020672387909144163,
  0.0020639440044760704,
  0.0020606089383363724,
  0.002057330682873726,
  0.002054184675216675,
  0.0020510258618742228,
  0.002047860762104392,
  0.0020447089336812496,
  0.002041563158854842,
  0.0020384383387863636,
  0.0020354227162897587,
  0.002032381249591708,
  0.0020293460693210363,
  0.002026328584179282,
  0.0020233034156262875,
  0.002020283