# 라이브러리 및 데이터

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install tensorflow



In [3]:
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [4]:
x_data = pd.read_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/2. 상권 활성화 지수 예측 모델/0) 데이터/data_x.csv')
y_data = pd.read_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/2. 상권 활성화 지수 예측 모델/0) 데이터/data_y.csv')

In [5]:
scaler = MinMaxScaler()
x_columns_to_scale = x_data.columns.difference(['년도', '분기', '행정동코드', '표준산업분류코드'])
y_columns_to_scale = y_data.columns.difference(['년도', '분기', '행정동코드', '표준산업분류코드'])

x_data[x_columns_to_scale] = scaler.fit_transform(x_data[x_columns_to_scale])
y_data[y_columns_to_scale] = scaler.fit_transform(y_data[y_columns_to_scale])

# 모델링

## Linear Regression

In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values = pd.DataFrame(index=x_data.index)

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = LinearRegression()

    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE: {mse}')

    predicted_values[f'{predict_column}_1_pre'] = model.predict(x_data)

lr_pred = pd.concat([x_data, predicted_values], axis=1)

grade 예측 MSE: 0.0002493775547587891
result 예측 MSE: 0.001868943566274145
sales 예측 MSE: 0.0005951505915599874
infra 예측 MSE: 0.0003160802922126036
franch 예측 MSE: 0.003541150432148625
pop 예측 MSE: 0.00045607666531934424
fin 예측 MSE: 0.00018206938226202868


## XGBoost

In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values = pd.DataFrame(index=x_data.index)

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = XGBRegressor(random_state=42)

    param_grid = {'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]}

    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)
    grid_search.fit(x_train, y_train)

    best_n_estimators = grid_search.best_params_['n_estimators']
    print(f'Best n_estimators for {predict_column}: {best_n_estimators}')

    final_model = XGBRegressor(n_estimators=best_n_estimators, random_state=42)
    final_model.fit(x_train, y_train)

    y_pred = final_model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE: {mse}')

    predicted_values[f'{predict_column}_1_pre'] = final_model.predict(x_data)

xgb_pred = pd.concat([x_data, predicted_values], axis=1)

Best n_estimators for grade: 600
grade 예측 MSE: 0.00013817806531693437
Best n_estimators for result: 200
result 예측 MSE: 0.0020548436539918977
Best n_estimators for sales: 200
sales 예측 MSE: 0.00043229000024283846
Best n_estimators for infra: 200
infra 예측 MSE: 0.00024023010634554903
Best n_estimators for franch: 100
franch 예측 MSE: 0.00095284021618859
Best n_estimators for pop: 200
pop 예측 MSE: 0.0006650815354796777
Best n_estimators for fin: 400
fin 예측 MSE: 9.079994387582427e-05


## RandomForest

In [None]:
target_columns = ['grade', 'result', 'sales', 'infra']

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = RandomForestRegressor(random_state=42)

    param_grid = {'n_estimators': [100, 200, 300, 400, 500]}

    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=3)
    grid_search.fit(x_train, y_train)

    best_n_estimators = grid_search.best_params_['n_estimators']
    print(f'Best n_estimators for {predict_column}: {best_n_estimators}')

    final_model = RandomForestRegressor(n_estimators=best_n_estimators, random_state=42)
    final_model.fit(x_train, y_train)

    y_pred = final_model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE: {mse}')

Best n_estimators for grade: 500
grade 예측 MSE: 0.00016896106524057298
Best n_estimators for result: 400
result 예측 MSE: 0.0024163131826768548
Best n_estimators for sales: 300
sales 예측 MSE: 0.00035926752020933977
Best n_estimators for infra: 400
infra 예측 MSE: 0.00016086873406102927


In [None]:
target_columns = ['franch', 'pop', 'fin']

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = RandomForestRegressor(random_state=42)

    param_grid = {'n_estimators': [100, 200, 300, 400, 500]}

    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=3)
    grid_search.fit(x_train, y_train)

    best_n_estimators = grid_search.best_params_['n_estimators']
    print(f'Best n_estimators for {predict_column}: {best_n_estimators}')

    final_model = RandomForestRegressor(n_estimators=best_n_estimators, random_state=42)
    final_model.fit(x_train, y_train)

    y_pred = final_model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE: {mse}')

Best n_estimators for franch: 100
franch 예측 MSE: 0.0010449255301962792
Best n_estimators for pop: 500
pop 예측 MSE: 0.0009865179465192544
Best n_estimators for fin: 400
fin 예측 MSE: 0.0001111237123514923


In [None]:
target_columns = ['grade', 'pop']

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = RandomForestRegressor(random_state=42)

    param_grid = {'n_estimators': [600, 700, 800, 900, 1000]}

    grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=3)
    grid_search.fit(x_train, y_train)

    best_n_estimators = grid_search.best_params_['n_estimators']
    print(f'Best n_estimators for {predict_column}: {best_n_estimators}')

    final_model = RandomForestRegressor(n_estimators=best_n_estimators, random_state=42)
    final_model.fit(x_train, y_train)

    y_pred = final_model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE: {mse}')

Best n_estimators for grade: 1000
grade 예측 MSE: 0.0001711757363007149
Best n_estimators for pop: 600
pop 예측 MSE: 0.0009907963275322116


In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']
n_estimators_list = [1000, 400, 300, 400, 100, 600, 400]

predicted_values = pd.DataFrame(index=x_data.index)

for i, predict_column in enumerate(target_columns):
    n_estimators = n_estimators_list[i]

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    print(f'{predict_column} 예측 MSE (n_estimators={n_estimators}): {mse}')

    predicted_values[f'{predict_column}_1_pre'] = model.predict(x_data)

rf_pred = pd.concat([x_data, predicted_values], axis=1)

grade 예측 MSE (n_estimators=1000): 0.0001711757363007149
result 예측 MSE (n_estimators=400): 0.0024163131826768548
sales 예측 MSE (n_estimators=300): 0.00035926752020933977
infra 예측 MSE (n_estimators=400): 0.00016086873406102927
franch 예측 MSE (n_estimators=100): 0.0010449255301962792
pop 예측 MSE (n_estimators=600): 0.0009907963275322116
fin 예측 MSE (n_estimators=400): 0.0001111237123514923


## NeuralNetwork

In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_1 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        Dense(50, input_shape=(x_train.shape[1],)),
        BatchNormalization(),
        Activation('relu'),
        Dense(50),
        BatchNormalization(),
        Activation('relu'),
        Dense(1),
        Activation('sigmoid'),
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    checkpoint_filepath = f"best_model_checkpoint1_{predict_column}_1.h5"

    model_checkpoint = ModelCheckpoint(checkpoint_filepath, save_best_only=True)

    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint], verbose=1)

    model.load_weights(checkpoint_filepath)

    y_pred = model.predict(x_test).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    predicted_values_1[f'{predict_column}_1_pre'] = model.predict(x_data).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

nn_pred_1 = pd.concat([x_data, predicted_values_1], axis=1)

Epoch 1/100
Epoch 2/100
 40/776 [>.............................] - ETA: 2s - loss: 0.0169

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
grade 예측 MSE: 0.021163224598246603
Epoch 1/100
Epoch 2/100
 41/776 [>.............................] - ETA: 2s - loss: 0.0888

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
result 예측 MSE: 0.09323138560051666
Epoch 1/100
Epoch 2/100
 40/776 [>.............................] - ETA: 2s - loss: 0.0229

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
sales 예측 MSE: 0.04495108997969566
Epoch 1/100
Epoch 2/100
 37/776 [>.............................] - ETA: 3s - loss: 0.0204

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
infra 예측 MSE: 0.026335235676823478
Epoch 1/100
Epoch 2/100
 37/776 [>.............................] - ETA: 3s - loss: 0.0234

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
franch 예측 MSE: 0.01230000985949758
Epoch 1/100
Epoch 2/100
 40/776 [>.............................] - ETA: 2s - loss: 0.0248

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
pop 예측 MSE: 0.025186626635748142
Epoch 1/100
Epoch 2/100
 40/776 [>.............................] - ETA: 2s - loss: 0.0157

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
fin 예측 MSE: 0.021006623848326817
grade 예측 MSE: 0.021163224598246603
result 예측 MSE: 0.09323138560051666
sales 예측 MSE: 0.04495108997969566
infra 예측 MSE: 0.026335235676823478
franch 예측 MSE: 0.01230000985949758
pop 예측 MSE: 0.025186626635748142
fin 예측 MSE: 0.021006623848326817


In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_2 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        Dense(64, input_shape=(x_train.shape[1],)),
        BatchNormalization(),
        Activation('relu'),
        Dense(32),
        BatchNormalization(),
        Activation('relu'),
        Dense(16),
        BatchNormalization(),
        Activation('relu'),
        Dense(1),
        Activation('sigmoid'),
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    checkpoint_filepath = f"best_model_checkpoint2_{predict_column}_1.h5"

    model_checkpoint = ModelCheckpoint(checkpoint_filepath, save_best_only=True)
    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint], verbose=1)

    model.load_weights(checkpoint_filepath)

    y_pred = model.predict(x_test).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    predicted_values_2[f'{predict_column}_1_pre'] = model.predict(x_data).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

nn_pred_2 = pd.concat([x_data, predicted_values_2], axis=1)

Epoch 1/100
Epoch 2/100
 17/776 [..............................] - ETA: 5s - loss: 0.0144

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
grade 예측 MSE: 0.0240899876268526
Epoch 1/100
Epoch 2/100
 23/776 [..............................] - ETA: 3s - loss: 0.0794

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
result 예측 MSE: 0.12270411705842342
Epoch 1/100
Epoch 2/100
 17/776 [..............................] - ETA: 5s - loss: 0.0198

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
sales 예측 MSE: 0.030981267253893316
Epoch 1/100
Epoch 2/100
 22/776 [..............................] - ETA: 3s - loss: 0.0204

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
infra 예측 MSE: 0.019418104483457405
Epoch 1/100
Epoch 2/100
 20/776 [..............................] - ETA: 4s - loss: 0.0197

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
franch 예측 MSE: 0.017339435196257205
Epoch 1/100
Epoch 2/100
 22/776 [..............................] - ETA: 3s - loss: 0.0237

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
pop 예측 MSE: 0.023319922540561384
Epoch 1/100
Epoch 2/100
 15/776 [..............................] - ETA: 5s - loss: 0.0154

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
fin 예측 MSE: 0.020466890978576634
grade 예측 MSE: 0.0240899876268526
result 예측 MSE: 0.12270411705842342
sales 예측 MSE: 0.030981267253893316
infra 예측 MSE: 0.019418104483457405
franch 예측 MSE: 0.017339435196257205
pop 예측 MSE: 0.023319922540561384
fin 예측 MSE: 0.020466890978576634


In [None]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_3 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train, x_test = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        Dense(64, input_shape=(x_train.shape[1],)),
        BatchNormalization(),
        Activation('relu'),
        Dense(32),
        BatchNormalization(),
        Activation('relu'),
        Dense(16),
        BatchNormalization(),
        Activation('relu'),
        Dense(8),
        BatchNormalization(),
        Activation('relu'),
        Dense(1),
        Activation('sigmoid'),
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    checkpoint_filepath = f"best_model_checkpoint3_{predict_column}_1.h5"

    model_checkpoint = ModelCheckpoint(checkpoint_filepath, save_best_only=True)
    model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping, model_checkpoint], verbose=1)

    model.load_weights(checkpoint_filepath)

    y_pred = model.predict(x_test).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    predicted_values_3[f'{predict_column}_1_pre'] = model.predict(x_data).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

nn_pred_3 = pd.concat([x_data, predicted_values_3], axis=1)

Epoch 1/100
Epoch 2/100
 19/776 [..............................] - ETA: 4s - loss: 0.0166

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
grade 예측 MSE: 0.018414485696600674
Epoch 1/100
Epoch 2/100
 19/776 [..............................] - ETA: 4s - loss: 0.0846

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
result 예측 MSE: 0.0864523872705236
Epoch 1/100
Epoch 2/100
 20/776 [..............................] - ETA: 4s - loss: 0.0216

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
sales 예측 MSE: 0.030580823126104606
Epoch 1/100
Epoch 2/100
 19/776 [..............................] - ETA: 4s - loss: 0.0218

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
infra 예측 MSE: 0.018651095840169763
Epoch 1/100
Epoch 2/100
 19/776 [..............................] - ETA: 4s - loss: 0.0282

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
franch 예측 MSE: 0.01720154276278815
Epoch 1/100
Epoch 2/100
 17/776 [..............................] - ETA: 4s - loss: 0.0245

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
pop 예측 MSE: 0.022692737410296655
Epoch 1/100
Epoch 2/100
 20/776 [..............................] - ETA: 4s - loss: 0.0154

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
fin 예측 MSE: 0.018416229486029254
grade 예측 MSE: 0.018414485696600674
result 예측 MSE: 0.0864523872705236
sales 예측 MSE: 0.030580823126104606
infra 예측 MSE: 0.018651095840169763
franch 예측 MSE: 0.01720154276278815
pop 예측 MSE: 0.022692737410296655
fin 예측 MSE: 0.018416229486029254


## LSTM

In [12]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_1 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    x_data_temp = x_data[[f'{predict_column}_2', f'{predict_column}_3']]

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train_current, x_test_current = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        LSTM(64, input_shape=(x_train_current.shape[1], 1), activation='relu'),
        BatchNormalization(),
        Dense(32),
        BatchNormalization(),
        Activation('relu'),
        Dense(1),
        Activation('linear'),
    ])

    print("형태 변경 전 - x_train_current 형태:", x_train_current.shape)
    x_train_lstm_current = np.reshape(x_train_current.values, (x_train_current.shape[0], x_train_current.shape[1], 1))
    x_test_lstm_current = np.reshape(x_test_current.values, (x_test_current.shape[0], x_test_current.shape[1], 1))
    print("형태 변경 후 - x_train_lstm_current 형태:", x_train_lstm_current.shape)

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(x_train_lstm_current, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

    y_pred = model.predict(x_test_lstm_current).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    x_data_lstm_current = np.reshape(x_data_temp.values, (x_data_temp.shape[0], x_data_temp.shape[1], 1))
    predicted_values_1[f'{predict_column}_1_pre'] = model.predict(x_data_lstm_current).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

lstm_pred_1 = pd.concat([x_data, predicted_values_1], axis=1)

형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
grade 예측 MSE: 0.019448015823330028
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
result 예측 MSE: 0.08624726641328663
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/

In [13]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_2 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    x_data_temp = x_data[[f'{predict_column}_2', f'{predict_column}_3']]

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train_current, x_test_current = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        LSTM(64, input_shape=(x_train_current.shape[1], 1), activation='relu', return_sequences=True),
        BatchNormalization(),
        LSTM(32, activation='relu'),
        BatchNormalization(),
        Dense(1),
        Activation('linear'),
    ])

    print("형태 변경 전 - x_train_current 형태:", x_train_current.shape)
    x_train_lstm_current = np.reshape(x_train_current.values, (x_train_current.shape[0], x_train_current.shape[1], 1))
    x_test_lstm_current = np.reshape(x_test_current.values, (x_test_current.shape[0], x_test_current.shape[1], 1))
    print("형태 변경 후 - x_train_lstm_current 형태:", x_train_lstm_current.shape)

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(x_train_lstm_current, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

    y_pred = model.predict(x_test_lstm_current).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    x_data_lstm_current = np.reshape(x_data_temp.values, (x_data_temp.shape[0], x_data_temp.shape[1], 1))
    predicted_values_2[f'{predict_column}_1_pre'] = model.predict(x_data_lstm_current).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

lstm_pred_2 = pd.concat([x_data, predicted_values_2], axis=1)

형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
grade 예측 MSE: 0.023054731501998563
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
result 예측 MSE: 0.10047990032827382
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/

In [14]:
target_columns = ['grade', 'result', 'sales', 'infra', 'franch', 'pop', 'fin']

predicted_values_3 = pd.DataFrame(index=x_data.index)
mse_dict = {}

for predict_column in target_columns:

    y_column = f'{predict_column}_y'
    y_temp = y_data[[y_column]].values.ravel()

    x_data_temp = x_data[[f'{predict_column}_2', f'{predict_column}_3']]

    total_samples = len(x_data)
    train_samples = int(0.8 * total_samples)

    x_train_current, x_test_current = x_data[:train_samples], x_data[train_samples:]
    y_train, y_test = y_temp[:train_samples], y_temp[train_samples:]

    model = Sequential([
        LSTM(64, input_shape=(x_train_current.shape[1], 1), activation='relu', return_sequences=True),
        BatchNormalization(),
        LSTM(32, activation='relu', return_sequences=True),
        BatchNormalization(),
        LSTM(32, activation='relu'),
        BatchNormalization(),
        Dense(1),
        Activation('linear'),
    ])

    print("형태 변경 전 - x_train_current 형태:", x_train_current.shape)
    x_train_lstm_current = np.reshape(x_train_current.values, (x_train_current.shape[0], x_train_current.shape[1], 1))
    x_test_lstm_current = np.reshape(x_test_current.values, (x_test_current.shape[0], x_test_current.shape[1], 1))
    print("형태 변경 후 - x_train_lstm_current 형태:", x_train_lstm_current.shape)

    model.compile(optimizer='adam', loss='mean_squared_error')

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(x_train_lstm_current, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

    y_pred = model.predict(x_test_lstm_current).reshape(-1)

    mse = mean_squared_error(y_test, y_pred)
    mse_dict[predict_column] = mse
    print(f'{predict_column} 예측 MSE: {mse}')

    x_data_lstm_current = np.reshape(x_data_temp.values, (x_data_temp.shape[0], x_data_temp.shape[1], 1))
    predicted_values_3[f'{predict_column}_1_pre'] = model.predict(x_data_lstm_current).reshape(-1)

for column, mse_value in mse_dict.items():
    print(f'{column} 예측 MSE: {mse_value}')

lstm_pred_3 = pd.concat([x_data, predicted_values_3], axis=1)

형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
grade 예측 MSE: 0.023096673910020335
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
result 예측 MSE: 0.09550370138175693
형태 변경 전 - x_train_current 형태: (31026, 88)
형태 변경 후 - x_train_lstm_current 형태: (31026, 88, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch

# 최종 데이터 저장

In [None]:
xgb_pred.to_csv('/content/drive/Shareddrives/서울시 빅데이터(2022-2)/논문/2. 상권 활성화 지수 예측 모델/3) 예측 데이터/x_forecast_xg.csv', index=False)