<a href="https://colab.research.google.com/github/donghui-0126/mini-project/blob/main/shoes-project/resell_regressor/bn_dnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.linear_model import LinearRegression
import warnings
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences



warnings.filterwarnings('ignore')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
df = pd.read_csv('/content/drive/MyDrive/kream_data/product_data_dpp.csv')

In [4]:
df.shape

(6103, 15)

In [5]:
df_nike = df[df['brand']=='Nike'][['brand', 'name', 'price_og', 'price_resell']]
df_jordan = df[df['brand']=='Jordan'][['brand', 'name', 'price_og', 'price_resell']]
df_adidas = df[df['brand']=='Adidas'][['brand', 'name', 'price_og', 'price_resell']]

In [6]:
print(df_nike.shape)
df_nike.head()

(1455, 4)


Unnamed: 0,brand,name,price_og,price_resell
0,Nike,Nike Dunk Low Retro Black,129000,152800
1,Nike,Nike Air Force 1 '07 Low White,139000,136200
2,Nike,Nike Air Force 1 '07 WB Flax,169000,173200
3,Nike,Nike x Supreme Air Force 1 Low Flax,184600,281400
4,Nike,Nike Zoom Vomero 5 PRM Light Iron Ore and Flat...,209000,258000


In [7]:
print(df_jordan.shape)
df_jordan.head()

(1376, 4)


Unnamed: 0,brand,name,price_og,price_resell
1455,Jordan,Jordan 1 Retro High OG Chicago 2022,209000,480800
1456,Jordan,Jordan 1 Low Concord,139000,163600
1457,Jordan,Jordan 1 High Golf White Black,249000,254400
1458,Jordan,Jordan 1 x Travis Scott Retro Low OG SP Black ...,189000,698000
1459,Jordan,Jordan 1 x Travis Scott Retro Low OG SP Sail a...,189000,1417800


In [8]:
print(df_adidas.shape)
df_adidas.head()

(1168, 4)


Unnamed: 0,brand,name,price_og,price_resell
2831,Adidas,Adidas Samba ADV Black,109000,197600
2832,Adidas,Adidas Samba Vegan White Black,109000,232800
2833,Adidas,Adidas Superstar 82 Black White,152000,158600
2834,Adidas,Adidas Yeezy Boost 350 V2 Slate Core Black,319000,354800
2835,Adidas,Adidas x BAPE Superstar 80s Core Black Off White,199000,249400


In [10]:
df_nike['price_resell'].describe()

count    1.455000e+03
mean     2.563218e+05
std      2.945740e+05
min      4.560000e+04
25%      1.389000e+05
50%      1.900000e+05
75%      2.816000e+05
max      8.018000e+06
Name: price_resell, dtype: float64

In [11]:
df_jordan['price_resell'].describe()

count    1.376000e+03
mean     3.379037e+05
std      5.207507e+05
min      5.000000e+04
25%      1.740500e+05
50%      2.399000e+05
75%      3.340500e+05
max      9.676000e+06
Name: price_resell, dtype: float64

In [12]:
(df_nike['price_resell']//100000).value_counts()

1     668
2     367
3     137
0     111
4      62
5      27
6      25
9      15
8      12
7      11
11      5
10      4
14      2
13      2
22      2
17      1
12      1
20      1
25      1
80      1
Name: price_resell, dtype: int64

In [13]:
df_nike = df_nike.drop(df_nike[df_nike['price_resell']>8000000].index)

In [14]:
df_nike['price_class'] = np.where(df_nike['price_resell'] <1000000,\
                                  df_nike['price_resell']//100000,\
                                  10)

-----

In [15]:
df_jordan = df_jordan.drop(df_nike[df_nike['price_resell']>5000000].index)

In [16]:
df_jordan['price_class'] = np.where(df_jordan['price_resell'] <1100000,\
                                  df_jordan['price_resell']//100000,\
                                  13)

---------

In [17]:
def make_input_col(df_list):
    for df in df_list:
        df['input'] = df['brand'] + " | " + df['name'] + " | " + (df['price_og']/10000.0).astype(str)
        df['target'] = df['price_resell']/10000.0

make_input_col([df_nike, df_jordan, df_adidas])

In [18]:
def tokenize_input(df_list, pd_list):
    return_list = []

    for df, pd in zip(df_list, pd_list):
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(df['input'])
        word_index = tokenizer.word_index

        print(f"word_index의 크기: {len(word_index)}")

        # 문장을 토큰화 한다.
        tokenized_name = tokenizer.texts_to_sequences(df['input'])
        padded_name = pad_sequences(tokenized_name)

        return_list.append((padded_name))
    return return_list

df_nike_input_ = pd.DataFrame()
df_jordan_input_ = pd.DataFrame()
df_adidas_input_ = pd.DataFrame()

[df_nike_input, df_jordan_input, df_adidas_input] = tokenize_input([df_nike, df_jordan, df_adidas], [df_nike_input_, df_jordan_input_, df_adidas_input_])

word_index의 크기: 1215
word_index의 크기: 967
word_index의 크기: 1046


In [19]:
from sklearn.model_selection import train_test_split

In [20]:
nike_train_X, nike_test_X, nike_train_y, nike_test_y = train_test_split(df_nike_input, df_nike['target'], test_size=0.3, shuffle=True, stratify=df_nike['price_class'])
nike_valid_X, nike_test_X, nike_valid_y, nike_test_y = train_test_split(nike_test_X, nike_test_y, test_size=0.5)

jordan_train_X, jordan_test_X, jordan_train_y, jordan_test_y = train_test_split(df_jordan_input, df_jordan['target'], test_size=0.3, shuffle=True, stratify=df_jordan['price_class'])
jordan_valid_X, jordan_test_X, jordan_valid_y, jordan_test_y = train_test_split(jordan_test_X, jordan_test_y, test_size=0.5)

In [21]:
%cd /content/drive/MyDrive/kream_data/model

/content/drive/MyDrive/kream_data/model


In [22]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

es=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30)
mc_nike=ModelCheckpoint('nike_bn_best_model.h5', monitor='val_loss', verbose=1, save_best_only=True)
mc_jordan=ModelCheckpoint('jordan_bn_best_model.h5', monitor='val_loss', verbose=1, save_best_only=True)

In [23]:
# 임베딩 층에 입력될 단어의 수를 지정한다.
word_size = 1216 + 1


nike_bn_model = keras.models.Sequential([
    # 임베딩층
    keras.layers.Embedding(word_size, word_size, input_length=18),
    # 회귀를 위해 임베딩층의 output값을 1차원의 array로 바꾸어준다.
    keras.layers.Flatten(),
    keras.layers.Dense(64,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(32,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(8,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4,activation='relu'),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(2,activation='relu'),
    keras.layers.Dense(1,activation='linear')
 ])

nike_bn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.005), loss='mse', metrics= ["mean_absolute_percentage_error"])

nike_bn_model.fit(nike_train_X, nike_train_y, batch_size=16, epochs=150, validation_data = (nike_valid_X,nike_valid_y),  callbacks=[es, mc_nike])

Epoch 1/150
Epoch 1: val_loss improved from inf to 1113.98987, saving model to nike_bn_best_model.h5
Epoch 2/150
Epoch 2: val_loss improved from 1113.98987 to 1032.34277, saving model to nike_bn_best_model.h5
Epoch 3/150
Epoch 3: val_loss improved from 1032.34277 to 951.32513, saving model to nike_bn_best_model.h5
Epoch 4/150
Epoch 4: val_loss did not improve from 951.32513
Epoch 5/150
Epoch 5: val_loss improved from 951.32513 to 926.30591, saving model to nike_bn_best_model.h5
Epoch 6/150
Epoch 6: val_loss did not improve from 926.30591
Epoch 7/150
Epoch 7: val_loss improved from 926.30591 to 918.34564, saving model to nike_bn_best_model.h5
Epoch 8/150
Epoch 8: val_loss improved from 918.34564 to 895.94452, saving model to nike_bn_best_model.h5
Epoch 9/150
Epoch 9: val_loss improved from 895.94452 to 832.64441, saving model to nike_bn_best_model.h5
Epoch 10/150
Epoch 10: val_loss did not improve from 832.64441
Epoch 11/150
Epoch 11: val_loss improved from 832.64441 to 820.58270, savin

<keras.callbacks.History at 0x7fd287b8a9b0>

In [24]:
model = keras.models.load_model('nike_bn_best_model.h5', custom_objects=None, compile=True)

model.evaluate(nike_test_X, nike_test_y)



[299.5014343261719, 57.988487243652344]

In [25]:
# 임베딩 층에 입력될 단어의 수를 지정한다.
word_size = 967 + 1


jordan_bn_model = keras.models.Sequential([
    # 임베딩층
    keras.layers.Embedding(word_size, word_size, input_length=16),
    # 회귀를 위해 임베딩층의 output값을 1차원의 array로 바꾸어준다.
    keras.layers.Flatten(),
    keras.layers.Dense(64,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(32,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(8,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4,activation='relu'),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(2,activation='relu'),
    keras.layers.Dense(1,activation='linear')
 ])

jordan_bn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.005), loss='mse', metrics= ["mean_absolute_percentage_error"])

jordan_bn_model.fit(jordan_train_X, jordan_train_y, batch_size=16, epochs=100, validation_data = (jordan_valid_X,jordan_valid_y),  callbacks=[es, mc_jordan])

Epoch 1/100
Epoch 1: val_loss improved from inf to 2242.78223, saving model to jordan_bn_best_model.h5
Epoch 2/100
Epoch 2: val_loss improved from 2242.78223 to 2225.12915, saving model to jordan_bn_best_model.h5
Epoch 3/100
Epoch 3: val_loss improved from 2225.12915 to 2207.58203, saving model to jordan_bn_best_model.h5
Epoch 4/100
Epoch 4: val_loss improved from 2207.58203 to 2190.40088, saving model to jordan_bn_best_model.h5
Epoch 5/100
Epoch 5: val_loss improved from 2190.40088 to 2173.62793, saving model to jordan_bn_best_model.h5
Epoch 6/100
Epoch 6: val_loss improved from 2173.62793 to 2156.79736, saving model to jordan_bn_best_model.h5
Epoch 7/100
Epoch 7: val_loss improved from 2156.79736 to 2140.52515, saving model to jordan_bn_best_model.h5
Epoch 8/100
Epoch 8: val_loss improved from 2140.52515 to 2124.25098, saving model to jordan_bn_best_model.h5
Epoch 9/100
Epoch 9: val_loss improved from 2124.25098 to 2108.29224, saving model to jordan_bn_best_model.h5
Epoch 10/100
Epoc

<keras.callbacks.History at 0x7fd271970b50>

In [26]:
model = keras.models.load_model('jordan_bn_best_model.h5', custom_objects=None, compile=True)

model.evaluate(jordan_test_X, jordan_test_y)



[3329.54052734375, 39.443790435791016]

In [None]:
model.evaluate()