# 모델링

In [1]:
# 필요 라이브러리 import
import numpy as np
import pandas as pd
from pyarrow import csv
import matplotlib.pyplot as plt
import seaborn as sns
import math

import tensorflow as tf
import cx_Oracle
from tensorflow import keras
from tensorflow.keras.layers import Dense,Flatten,Conv1D,Dropout,MaxPool1D,BatchNormalization,Dropout
from tensorflow.keras.models import Sequential ,Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import mse,mae
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error ,mean_absolute_error,confusion_matrix ,r2_score
import json
import os

In [2]:
CONN_INFO = {
        'NAME': 'XEPDB1',
        'USER': 'iitp',
        'PASSWORD': 'iitp',
        'HOST': '172.16.5.231',
        'PORT': '11521',
}
CONN_STR = '{USER}/{PASSWORD}@{HOST}:{PORT}/{NAME}'.format(**CONN_INFO)
conn = cx_Oracle.connect(CONN_STR)
cursor = conn.cursor()


def df_round(df , cols = ['이혼율','고용률','실업률','한국인_남녀비율','한국인_인구밀도','경찰청_인원_명당_담당_인구']):
    count_col_name= df.columns.to_list()
    for colN in cols:
        count_col_name.remove(colN)
    for colN in count_col_name:
        df[colN] = df[colN].round()
    df['시도'] = df['시도'].astype(int)
    df['년도'] = df['년도'].astype(int)
    return df

In [3]:
sido_code = pd.read_sql("select * from sido_code",con=conn) 
sido_code['CODE'] = sido_code['CODE'].astype(int)
sejong_code = sido_code[sido_code.KOR_NAME == '세종'].iloc[0]['CODE']

crime_code =  pd.read_sql("select * from crime_Specification",con=conn)
crime_code['CODE'] = crime_code['CODE'].astype(int)

indi_val = df_round(pd.read_sql("select * from independent_val",con=conn).astype(float) )
indi_val = indi_val[indi_val.시도 != sejong_code]

indi_zero = df_round(pd.read_sql("select * from independent_zero",con=conn).astype(float) )
indi_zero = indi_zero[indi_zero.시도 != sejong_code]

indi_mean = df_round(pd.read_sql("select * from independent_mean",con=conn).astype(float) )
indi_mean = indi_mean[indi_mean.시도 != sejong_code]

crime_val = pd.read_sql("select * from crime",con=conn).astype(float) 
crime_val['시도'] = crime_val['시도'].astype(int)
crime_val['년도'] = crime_val['년도'].astype(int)
crime_val = crime_val[crime_val.시도 != sejong_code]

1. 년도
    - crime_val : 2002~ 2019
    - indi_val  : 2000 ~ 2020  
    - indi_zero : 2002 ~ 2030
    - indi_mean : 2002 ~ 2030


In [40]:
# 모델생성

# 단순 Dense 모델
def create_Dense_model(x,y,loss_ = keras.losses.mse,optimizer_ = "adam"):
    model = Sequential([
        Dense(256, input_dim = x.shape[1], activation='relu', kernel_initializer='he_normal'),Dropout(0.5),
        Dense(256,activation = 'relu', kernel_initializer='he_normal'),Dropout(0.5),
        Dense(128,activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(128,activation = 'relu',kernel_initializer='he_normal'),Dropout(0.3),
        Dense(128, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(32, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
        Dense(16, activation = 'relu', kernel_initializer='he_normal'),
        Dense(1, activation = 'linear')    
    ])
    model.compile(
        loss = loss_, 
        optimizer = optimizer_,
        metrics = ['mae']
    )
    # model.summary()
    return model


# CNN Model 생성및 컴파일
# CNN Model 생성및 컴파일
def create_conv1d_model(x,y,loss_ = keras.losses.mse,optimizer_ = 'adam'):
    model = Sequential([        
        Conv1D(256, 2, padding='valid', activation='relu',input_shape = (x.shape[1],1),kernel_initializer='he_normal'),Dropout(0.5),
        MaxPool1D(), BatchNormalization(),
        Conv1D(256, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        Conv1D(256, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        Conv1D(256, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        BatchNormalization(), 
        Conv1D(128, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        Conv1D(128, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        Conv1D(128, 2, padding='same', activation='relu',kernel_initializer='he_normal'),Dropout(0.5),
        MaxPool1D(), BatchNormalization(), 
        Flatten(),
        Dense(512,activation = 'relu', kernel_initializer='he_normal'),
        Dense(256,activation = 'relu', kernel_initializer='he_normal'),
        Dense(128,activation = 'relu', kernel_initializer='he_normal'),
        Dense(1,activation = 'linear')    
    ])
    model.compile( 
        loss = loss_,
        optimizer = optimizer_,
        metrics = ['mae']
    )
    # model.summary()
    return model


In [41]:
#훈련결과 확인 및 그래프 저장
def save_train_history(history,title):
    print(history.history.keys())

    img_path = '../img'
    path = img_path+'/train_history/'
    if not os.path.isdir(img_path):
        os.mkdir(img_path)
    if not os.path.isdir(path):
        os.mkdir(path)
    hist_dict = history.history
    loss = hist_dict['loss']
    val_loss = hist_dict['val_loss']
    acc = hist_dict['mae']
    val_acc = hist_dict['val_mae']
    
    plt.subplot(2,1,1)
    plt.title(title)
    plt.plot(loss, 'b--', label = 'train loss')
    plt.plot(val_loss, 'r:' , label = 'val loss')
    plt.legend()
    plt.grid()
        
    plt.subplot(2,1,2)
    plt.title(title)
    plt.plot(acc, 'b--', label = 'train mae')
    plt.plot(val_acc, 'r:' , label = 'val mae')
    plt.legend()
    plt.grid()
    
    plt.tight_layout()
    plt.savefig(path+title) 
    plt.clf()


# Dense모델 훈련 및 저장
def dense_fit(x_train,y_train,compare_,losses_,optimizers_,lName,oName):
    modelsPath = '../data/models/' 
    model_checkPoint = modelsPath + 'checkPoint/' 
    checkPoint_Name = f'{compare_}_Dense_{lName}_{oName}'
    if not os.path.isdir(model_checkPoint):
            os.mkdir(model_checkPoint)
    if not os.path.isdir(modelsPath + f'{compare_}'):
            os.mkdir(modelsPath + f'{compare_}')
    if not os.path.isdir(modelsPath + f'{compare_}/dense/'):
            os.mkdir(modelsPath + f'{compare_}/dense/')

    model = create_Dense_model(
        x = x_train,
        y = y_train,
        loss_ = losses_,
        optimizer_ = optimizers_
    ) 
    print(f'start {checkPoint_Name}')
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=1000) 
    mc = ModelCheckpoint(model_checkPoint+ checkPoint_Name+'.h5', monitor='val_mae', mode='min', save_best_only=True)
    with tf.device('/device:GPU:0'):
        history = model.fit(
            x = x_train,
            y = y_train,
            epochs= 50000, 
            batch_size= 1024,
            verbose= 1, # 훈련 횟수가 많아지면서, Ram점유율이 너무 높아져서 훈련과정 출력 안 게 설정
            callbacks=[es,mc],
            validation_split=0.2
        )

    model = create_Dense_model(
        x = x_train,
        y = y_train,
        loss_ = losses_,
        optimizer_ = optimizers_
    ) 
    # 저장된 가장 결과가 좋은 checkPoint 로드
    model.load_weights(model_checkPoint+ checkPoint_Name+'.h5') 
    # 모델 평가 
    loss,acc = model.evaluate(x_test,y_test, verbose=0) 
    # 모델 저장 
    model.save(modelsPath + f'{compare_}/dense/'+ checkPoint_Name+ f'_loss({loss:.2f})_mae({acc:.2f}).h5')
    save_train_history(
        history,
        title = checkPoint_Name+ f'_loss({loss:.2f})_mae({acc:5.2f}).png'
        )
    return history

# Dense모델 훈련 및 저장
def conv_fit(x_train,y_train,compare_,losses_,optimizers_,lName,oName):
    modelsPath = '../data/models/' 
    model_checkPoint = modelsPath + 'checkPoint/' 
    checkPoint_Name = f'{compare_}_conv_{lName}_{oName}'
    if not os.path.isdir(model_checkPoint):
            os.mkdir(model_checkPoint)
    if not os.path.isdir(modelsPath + f'{compare_}'):
            os.mkdir(modelsPath + f'{compare_}')
    if not os.path.isdir(modelsPath + f'{compare_}/conv/'):
            os.mkdir(modelsPath + f'{compare_}/conv/')
                
    model = create_conv1d_model(
        x = x_train,
        y = y_train,
        loss_ = losses_,
        optimizer_ = optimizers_
    )  

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=1000) # 시간이 부족하여 patience를 500 -> 50으로 축소
    mc = ModelCheckpoint(model_checkPoint+ checkPoint_Name+'.h5', monitor='val_mae', mode='min', save_best_only=True)
    with tf.device('/device:GPU:0'):
        history = model.fit(
            x = x_train, y = y_train,
            epochs= 50000, 
            batch_size= 1024,
            verbose= 1, # 훈련 횟수가 많아지면서, Ram점유율이 너무 높아져서 훈련과정 출력 안 게 설정
            callbacks=[es,mc],
            validation_split=0.2)
            
    model = create_conv1d_model(
        x = x_train,
        y = y_train,
        loss_ = losses_,
        optimizer_ = optimizers_) 
    # 저장된 가장 결과가 좋은 checkPoint 로드
    model.load_weights(model_checkPoint+ checkPoint_Name+'.h5') 
    # 모델 평가 
    loss,acc = model.evaluate(x_test,y_test, verbose=0) 

    # 모델 저장 
    model.save(modelsPath + f'{compare_}/conv/'+ checkPoint_Name+ f'_loss({loss:.2f})_mae({acc:.2f}).h5')
    save_train_history(
        history,
        title = checkPoint_Name+ f'_loss({loss:.2f})_mae({acc:5.2f}).png')
    return history

In [69]:

# for문을 이용하여 생각하는 loss함수 및 optimizer 모두 테스트 해보기위해 선언
# 모델 검증을 MAE로 하기 때문에 LOSS는 배제함
losses_ =[
    # keras.losses.mse,
    # rmse,
    # keras.losses.mae, 
    keras.losses.logcosh,
    ]
    
losses_name = [
    # 'mse',
    # 'rmse',
    # 'mae', 
    'logcosh'
]
# Adadelta는 다른 옵티마이저에비하여 성능은 부족하나, 사용하는 시간이 비정상정으로 높아서 배제함
optimizers_ = [
    keras.optimizers.Adam(learning_rate= 1e-5),
    # keras.optimizers.RMSprop(learning_rate= 1e-5),
    # keras.optimizers.SGD(learning_rate= 1e-5),
    # keras.optimizers.Nadam(learning_rate= 1e-5),
    # keras.optimizers.Adadelta(learning_rate= 1e-5),
    # keras.optimizers.Adagrad(learning_rate= 1e-5),
    ]

optimizers_name = [
    'Adam',
    # 'RMSprop',
    # 'SGD',
    # 'Nadam',
    # 'Adadelta', 
    # 'Adagrad' 
]

def denseModelCheck(compare_):
    for loss_idx in range(len(losses_)):
        for optimizers_idx in range(len(optimizers_)) : 
            print(losses_name[loss_idx],optimizers_name[optimizers_idx])    
            history = dense_fit(
                x_train = x_train,
                y_train = y_train,
                compare_ = compare_,
                losses_ = losses_[loss_idx],
                optimizers_ = optimizers_[optimizers_idx],
                lName = losses_name[loss_idx],
                oName=  optimizers_name[optimizers_idx]
                )
            # try:
            #     print(losses_name[loss_idx],optimizers_name[optimizers_idx])    
            #     history = dense_fit(
            #         x_train = x_train,
            #         y_train = y_train,
            #         compare_ = compare_,
            #         losses_ = losses_[loss_idx],
            #         optimizers_ = optimizers_[optimizers_idx],
            #         lName = losses_name[loss_idx],
            #         oName=  optimizers_name[optimizers_idx]
            #         )
            # except:
            #     print(losses_name[loss_idx],optimizers_name[optimizers_idx],'Fail')
def conv1DModelCheck (compare_):
    for loss_idx in range(len(losses_)):
        for optimizers_idx in range(len(optimizers_)) : 
            try:
                history = conv_fit(
                    x_train = x_train,
                    y_train = y_train,
                    compare_ = compare_,
                    losses_ = losses_[loss_idx],
                    optimizers_ = optimizers_[optimizers_idx],
                    lName = losses_name[loss_idx],
                    oName=  optimizers_name[optimizers_idx]
                    )
            except:
                print(losses_name[loss_idx],optimizers_name[optimizers_idx],'Fail')    

In [7]:
data_path ='../data/'
def read_json(jsonPath ,mod = 'r',encoding ='utf-8'):
    with open(jsonPath, mod, encoding = encoding) as common:
        config = json.load(common)
    return config
config = read_json(data_path + "json/config.json")

indi_val = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/regression_predict.csv').to_pandas()
indi_val = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/mean_predict.csv').to_pandas()
indi_val = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/zero_predict.csv').to_pandas()
crime_val

Unnamed: 0,시도,년도,범죄종류,범죄수
0,48,2009,3.0,231.0
1,31,2009,3.0,143.0
2,41,2009,3.0,1397.0
3,28,2009,3.0,367.0
4,30,2009,3.0,311.0
...,...,...,...,...
4139,42,2011,10.0,177.0
4140,47,2011,10.0,183.0
4141,46,2011,10.0,120.0
4142,43,2011,10.0,132.0


In [54]:
indi_mean = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/mean_predict.csv').to_pandas()



In [68]:
import ast

colList = pd.read_sql("select * from topFiveCols",con=conn)

raw_x = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/mean_predict.csv').to_pandas()




for crime_N in colList.CODE:
    # x = raw_x[ast.literal_eval(colList.COLLIST[crime_N])]    
    x = raw_x[(raw_x.년도 >= 2002)&(raw_x.년도 <= 2019)&(raw_x.시도 != '세종')]   
    y = crime_val[crime_val.범죄종류 == crime_N].sort_values(['시도' ,'년도']).범죄수.reset_index(drop = True).fillna(0)
    # Conv1로 구성
    x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.2)
    x_train = x_train.to_numpy().reshape(x_train.shape[0],x_train.shape[1],1)
    x_test = x_test.to_numpy().reshape(x_test.shape[0],x_test.shape[1],1)
    conv1DModelCheck(crime_code[crime_code.CODE == crime_N].CRIME_NAME.values[0]) 
    


logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail
logcosh Adam Fail


In [50]:
import ast

colList = pd.read_sql("select * from topFiveCols",con=conn)
# raw_x = indi_zero[(indi_zero.년도 <= 2019)].sort_values(['시도' ,'년도']).iloc[:,2:].reset_index(drop = True)
raw_x = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/mean_predict.csv').to_pandas()

for crime_N in colList.CODE:
    x = raw_x[ast.literal_eval(colList.COLLIST[crime_N])]
    
    y = crime_val[crime_val.범죄종류 == crime_N].sort_values(['시도' ,'년도']).범죄수.reset_index(drop = True).fillna(0)
    # Conv1로 구성
    x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.2)
    x_train = x_train.to_numpy().reshape(x_train.shape[0],x_train.shape[1],1)
    x_test = x_test.to_numpy().reshape(x_test.shape[0],x_test.shape[1],1)
    conv1DModelCheck(crime_code[crime_code.CODE == crime_N].CRIME_NAME.values[0]) 
    


KeyError: "['경찰청_인원_명당_담당_인구', '대학교_수'] not in index"

In [7]:
import ast

colList = pd.read_sql("select * from topFiveCols",con=conn)

raw_x = indi_zero[(indi_zero.년도 <= 2019)].sort_values(['시도' ,'년도']).iloc[:,2:].reset_index(drop = True)
# raw_y = crime_val[crime_val.범죄종류 == 0].sort_values(['시도' ,'년도']).범죄수.reset_index(drop = True)
cnt= 0
for crime_N in colList.CODE:
    x = raw_x[ast.literal_eval(colList.COLLIST[crime_N])]
    y = crime_val[crime_val.범죄종류 == crime_N].sort_values(['시도' ,'년도']).범죄수.reset_index(drop = True).fillna(0)
    
    x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.2)
    denseModelCheck(crime_code[crime_code.CODE == crime_N].CRIME_NAME.values[0])
    cnt+=1
cnt


mse Adam
start 절도_Dense_mse_Adam
Epoch 1/50000
Epoch 2/50000
Epoch 3/50000
Epoch 4/50000
Epoch 5/50000
Epoch 6/50000
Epoch 7/50000
Epoch 8/50000
Epoch 9/50000
Epoch 10/50000
Epoch 11/50000
Epoch 12/50000
Epoch 13/50000
Epoch 14/50000
Epoch 15/50000
Epoch 16/50000
Epoch 17/50000
Epoch 18/50000
Epoch 19/50000
Epoch 20/50000
Epoch 21/50000
Epoch 22/50000
Epoch 23/50000
Epoch 24/50000
Epoch 25/50000
Epoch 26/50000
Epoch 27/50000
Epoch 28/50000
Epoch 29/50000
Epoch 30/50000
Epoch 31/50000
Epoch 32/50000
Epoch 33/50000
Epoch 34/50000
Epoch 35/50000
Epoch 36/50000
Epoch 37/50000
Epoch 38/50000
Epoch 39/50000
Epoch 40/50000
Epoch 41/50000
Epoch 42/50000
Epoch 43/50000
Epoch 44/50000
Epoch 45/50000
Epoch 46/50000
Epoch 47/50000
Epoch 48/50000
Epoch 49/50000
Epoch 50/50000
Epoch 51/50000
Epoch 52/50000
Epoch 53/50000
Epoch 54/50000
Epoch 55/50000
Epoch 56/50000
Epoch 57/50000
Epoch 58/50000
Epoch 59/50000
Epoch 60/50000
Epoch 61/50000
Epoch 62/50000
Epoch 63/50000
Epoch 64/50000
Epoch 65/50000
E

  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0, flags=flags)
  font.set_text(s, 0, flags=flags)


mse RMSprop
start 절도_Dense_mse_RMSprop
Epoch 1/50000
Epoch 2/50000
Epoch 3/50000
Epoch 4/50000
Epoch 5/50000
Epoch 6/50000
Epoch 7/50000
Epoch 8/50000
Epoch 9/50000
Epoch 10/50000
Epoch 11/50000
Epoch 12/50000
Epoch 13/50000
Epoch 14/50000
Epoch 15/50000
Epoch 16/50000
Epoch 17/50000
Epoch 18/50000
Epoch 19/50000
Epoch 20/50000
Epoch 21/50000
Epoch 22/50000
Epoch 23/50000
Epoch 24/50000
Epoch 25/50000
Epoch 26/50000
Epoch 27/50000
Epoch 28/50000
Epoch 29/50000
Epoch 30/50000
Epoch 31/50000
Epoch 32/50000
Epoch 33/50000
Epoch 34/50000
Epoch 35/50000
Epoch 36/50000
Epoch 37/50000
Epoch 38/50000
Epoch 39/50000
Epoch 40/50000
Epoch 41/50000
Epoch 42/50000
Epoch 43/50000
Epoch 44/50000
Epoch 45/50000
Epoch 46/50000
Epoch 47/50000
Epoch 48/50000
Epoch 49/50000
Epoch 50/50000
Epoch 51/50000
Epoch 52/50000
Epoch 53/50000
Epoch 54/50000
Epoch 55/50000
Epoch 56/50000
Epoch 57/50000
Epoch 58/50000
Epoch 59/50000
Epoch 60/50000
Epoch 61/50000
Epoch 62/50000
Epoch 63/50000
Epoch 64/50000
Epoch 65/5

In [94]:

def transColName(cols):
    cols = cols.replace(' (%)', '')
    cols = cols.replace(' (명)', '')
    cols = cols.replace('[명]', '')
    cols = cols.replace('[백분율]', '')
    cols = cols.replace(')', '')
    cols = cols.replace('(', '_')
    cols = cols.replace(' ', '_')
    cols = cols.replace('1', '')
    return cols 

indi_mean = csv.read_csv(config['data_path']+ 'csv/cleaned/independent/mean_predict.csv').to_pandas()
df = csv.read_csv(config['data_path']+ 'csv/cleaned/crime_merged.csv').to_pandas()
crime_V = df[(df.년도 >= 2002)&(df.년도 <= 2019)&(df.시도 != '세종')]
crimeName= crime_V.columns[2:15]
crime_V = crime_V.iloc[:,:15]
indi_mean = indi_mean[(indi_mean.년도 >= 2002)&(indi_mean.년도 <= 2019)&(indi_mean.시도 != '세종')]
indi_mean.columns = list(map(transColName,indi_mean.columns))
crime_V.columns = list(map(transColName,crime_V.columns))

predict = pd.merge(crime_V, indi_mean, left_on=['시도','년도'], right_on=['시도','년도'], how='right')


Unnamed: 0,이혼율,경제활동인구,비경제활동인구,취업자,고용률,실업자,실업률,인당_지역내총생산,인당_지역총소득,인당_개인소득,...,경찰청_인원_명당_담당_인구,한국인_총인구수,한국인_남녀비율,한국인_인구밀도,총전입,총전출,대학교_수,종교단체수,외국인수,총인구수
0,3.0,700000.0,473000.0,687000.0,58.6,13000.0,1.8,13501.0,12592.0,8491.0,...,483.461078,1538720.0,100.674253,91.183407,91981.0,103717.0,21.520833,2786.0,57402.493056,1538720.0
1,3.4,684000.0,488000.0,670000.0,57.2,14000.0,2.0,14935.0,13433.0,8960.0,...,483.461078,1527034.0,100.994287,90.490904,98684.0,105886.0,20.000000,2890.0,6297.000000,1527034.0
2,2.8,695000.0,479000.0,681000.0,58.0,15000.0,2.1,15806.0,14058.0,9589.0,...,483.461078,1521375.0,101.087934,90.155556,101854.0,107425.0,20.000000,2945.0,7265.000000,1521375.0
3,2.7,689000.0,486000.0,676000.0,57.6,12000.0,1.8,16379.0,15049.0,10036.0,...,483.461078,1513110.0,101.086292,89.665778,101090.0,108602.0,20.000000,3035.0,7989.000000,1513110.0
4,2.6,696000.0,482000.0,687000.0,58.3,9000.0,1.3,17498.0,16156.0,10530.0,...,483.461078,1505420.0,101.117930,89.210074,106180.0,113563.0,19.000000,3057.0,10252.000000,1505420.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,1.8,5391000.0,3220000.0,5165000.0,60.0,226000.0,4.2,37236.0,41857.0,20857.0,...,393.576348,10022181.0,96.851552,16560.114010,670285.0,681407.0,48.000000,9832.0,274957.000000,10022181.0
284,1.8,5372000.0,3211000.0,5146000.0,60.0,226000.0,4.2,39356.0,44111.0,21295.0,...,373.269086,9930616.0,96.496952,16408.816920,674241.0,678366.0,48.000000,9753.0,273441.000000,9930616.0
285,1.8,5396000.0,3161000.0,5152000.0,60.2,244000.0,4.5,41374.0,46715.0,22237.0,...,366.633315,9857426.0,96.081055,16287.881690,679853.0,671536.0,48.000000,9660.0,267153.000000,9857426.0
286,1.8,5335000.0,3186000.0,5080000.0,59.6,254000.0,4.8,43664.0,48362.0,23323.0,...,483.461078,9765623.0,95.636277,16136.191340,716868.0,695181.0,48.000000,9842.0,283984.000000,9765623.0


In [99]:
y = predict.iloc[:,2:15]


0       2246.0
1       1969.0
2       2316.0
3       2979.0
4       2576.0
        ...   
283    55463.0
284    47005.0
285    41293.0
286    39306.0
287    42362.0
Name: 절도, Length: 288, dtype: float64

Index(['절도', '살인', '강도', '방화', '성폭력', '폭행및상해', '공갈및협박', '약취와_유인', '체포와_감금',
       '주거침입', '유기', '공무방해', '도주와범인은닉'],
      dtype='object')

In [112]:
x = predict.iloc[:,15:]
y = predict.iloc[:,2:15]

for colName in y.columns:
    # model = create_Dense_model(x,y.iloc[:,0],loss_ = keras.losses.logcosh,optimizer_ = "adam")
    model = Sequential([
            Dense(256, input_dim = x.shape[1], activation='relu', kernel_initializer='he_normal'),Dropout(0.5),
            Dense(256,activation = 'relu', kernel_initializer='he_normal'),Dropout(0.5),
            Dense(128,activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(128,activation = 'relu',kernel_initializer='he_normal'),Dropout(0.3),
            Dense(128, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(64, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(32, activation = 'relu', kernel_initializer='he_normal'),Dropout(0.3),
            Dense(16, activation = 'relu', kernel_initializer='he_normal'),
            Dense(1, activation = 'linear')    
    ])
    model.compile(
        loss = keras.losses.mse, 
        optimizer = keras.optimizers.Adam(learning_rate=1e-5),
        metrics = ['mae']
    )
    x_train, x_test,y_train, y_test = train_test_split(x,y[colName],test_size=0.2)


    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=1000) # 시간이 부족하여 patience를 500 -> 50으로 축소
    mc = ModelCheckpoint('temp.h5', monitor='val_mae', mode='min', save_best_only=True)

    with tf.device('/device:GPU:0'):
        history = model.fit(
            x = x_train, y = y_train,
            epochs= 50000, 
            batch_size= 1024,
            verbose= 1, # 훈련 횟수가 많아지면서, Ram점유율이 너무 높아져서 훈련과정 출력 안 게 설정
            callbacks=[es,mc],
            validation_split=0.2)

    model = create_conv1d_model(
        x = x_train,
        y = y_train,
        loss_ = losses_,
        optimizer_ = optimizers_) 
    # 저장된 가장 결과가 좋은 checkPoint 로드
    model.load_weights('temp.h5') 
    # 모델 평가 
    loss,acc = model.evaluate(x_test,y_test, verbose=0) 
    # 모델 저장 
    model.save('temp.h5')

Epoch 1/50000
Epoch 2/50000
Epoch 3/50000
Epoch 4/50000

KeyboardInterrupt: 