<a href="https://colab.research.google.com/github/donghui-0126/mini-project/blob/main/shoes-project/resell_regressor/bn_dnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.linear_model import LinearRegression
import warnings
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences



warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/kream_data/product_data_dpp.csv')

In [None]:
df.shape

(6103, 15)

In [None]:
df_nike = df[df['brand']=='Nike'][['brand', 'name', 'price_og', 'price_resell']]
df_jordan = df[df['brand']=='Jordan'][['brand', 'name', 'price_og', 'price_resell']]
df_adidas = df[df['brand']=='Adidas'][['brand', 'name', 'price_og', 'price_resell']]

In [None]:
print(df_nike.shape)
df_nike.head()

(1455, 4)


Unnamed: 0,brand,name,price_og,price_resell
0,Nike,Nike Dunk Low Retro Black,129000,152800
1,Nike,Nike Air Force 1 '07 Low White,139000,136200
2,Nike,Nike Air Force 1 '07 WB Flax,169000,173200
3,Nike,Nike x Supreme Air Force 1 Low Flax,184600,281400
4,Nike,Nike Zoom Vomero 5 PRM Light Iron Ore and Flat...,209000,258000


In [None]:
print(df_jordan.shape)
df_jordan.head()

(1376, 4)


Unnamed: 0,brand,name,price_og,price_resell
1455,Jordan,Jordan 1 Retro High OG Chicago 2022,209000,480800
1456,Jordan,Jordan 1 Low Concord,139000,163600
1457,Jordan,Jordan 1 High Golf White Black,249000,254400
1458,Jordan,Jordan 1 x Travis Scott Retro Low OG SP Black ...,189000,698000
1459,Jordan,Jordan 1 x Travis Scott Retro Low OG SP Sail a...,189000,1417800


In [None]:
print(df_adidas.shape)
df_adidas.head()

(1168, 4)


Unnamed: 0,brand,name,price_og,price_resell
2831,Adidas,Adidas Samba ADV Black,109000,197600
2832,Adidas,Adidas Samba Vegan White Black,109000,232800
2833,Adidas,Adidas Superstar 82 Black White,152000,158600
2834,Adidas,Adidas Yeezy Boost 350 V2 Slate Core Black,319000,354800
2835,Adidas,Adidas x BAPE Superstar 80s Core Black Off White,199000,249400


In [None]:
df_nike['price_resell'].describe()

count    1.455000e+03
mean     2.563218e+05
std      2.945740e+05
min      4.560000e+04
25%      1.389000e+05
50%      1.900000e+05
75%      2.816000e+05
max      8.018000e+06
Name: price_resell, dtype: float64

In [None]:
df_jordan['price_resell'].describe()

count    1.376000e+03
mean     3.379037e+05
std      5.207507e+05
min      5.000000e+04
25%      1.740500e+05
50%      2.399000e+05
75%      3.340500e+05
max      9.676000e+06
Name: price_resell, dtype: float64

In [None]:
(df_nike['price_resell']//100000).value_counts()

1     668
2     367
3     137
0     111
4      62
5      27
6      25
9      15
8      12
7      11
11      5
10      4
14      2
13      2
22      2
17      1
12      1
20      1
25      1
80      1
Name: price_resell, dtype: int64

In [None]:
df_nike = df_nike.drop(df_nike[df_nike['price_resell']>8000000].index)

In [None]:
df_nike['price_class'] = np.where(df_nike['price_resell'] <1000000,\
                                  df_nike['price_resell']//100000,\
                                  10)

-----

In [None]:
(df_jordan['price_class']).value_counts()

2     468
1     428
3     199
4      78
5      52
0      51
13     37
6      27
7      13
8      12
10      6
9       5
Name: price_class, dtype: int64

In [None]:
df_jordan = df_jordan.drop(df_nike[df_nike['price_resell']>5000000].index)

In [None]:
df_jordan['price_class'] = np.where(df_jordan['price_resell'] <1100000,\
                                  df_jordan['price_resell']//100000,\
                                  13)

---------

In [None]:
def make_input_col(df_list):
    for df in df_list:
        df['input'] = df['brand'] + " | " + df['name'] + " | " + (df['price_og']/10000.0).astype(str)
        df['target'] = df['price_resell']/10000.0

make_input_col([df_nike, df_jordan, df_adidas])

In [None]:
def tokenize_input(df_list, pd_list):
    return_list = []

    for df, pd in zip(df_list, pd_list):
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(df['input'])
        word_index = tokenizer.word_index

        print(f"word_index의 크기: {len(word_index)}")
            
        # 문장을 토큰화 한다.
        tokenized_name = tokenizer.texts_to_sequences(df['input'])
        padded_name = pad_sequences(tokenized_name)

        return_list.append((padded_name))
    return return_list

df_nike_input_ = pd.DataFrame()
df_jordan_input_ = pd.DataFrame()
df_adidas_input_ = pd.DataFrame()

[df_nike_input, df_jordan_input, df_adidas_input] = tokenize_input([df_nike, df_jordan, df_adidas], [df_nike_input_, df_jordan_input_, df_adidas_input_])

word_index의 크기: 1215
word_index의 크기: 967
word_index의 크기: 1046


In [None]:
# tokenizer 저장
# python 변수(객체)를 pickle 파일로 저장
with open('bn_tokenizer.pickle', 'wb') as f:
    pickle.dump(tokenizer, f)

# tokenizer load 하기
with open('bn_tokenizer.pickle', 'rb') as f:
    tokenizer = pickle.load(f)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
nike_train_X, nike_test_X, nike_train_y, nike_test_y = train_test_split(df_nike_input, df_nike['target'], test_size=0.3, shuffle=True, stratify=df_nike['price_class'])
nike_valid_X, nike_test_X, nike_valid_y, nike_test_y = train_test_split(nike_test_X, nike_test_y, test_size=0.5)

jordan_train_X, jordan_test_X, jordan_train_y, jordan_test_y = train_test_split(df_jordan_input, df_jordan['target'], test_size=0.3, shuffle=True, stratify=df_jordan['price_class'])
jordan_valid_X, jordan_test_X, jordan_valid_y, jordan_test_y = train_test_split(jordan_test_X, jordan_test_y, test_size=0.5)

In [None]:
%cd /content/drive/MyDrive/Kream_data/dataset

In [None]:
# python 변수(객체)를 pickle 파일로 저장
with open('nike_train_X.pickle', 'wb') as f:
    pickle.dump(nike_train_X, f)
 
with open('nike_test_X.pickle', 'wb') as f:
    pickle.dump(nike_test_X, f)

with open('nike_vaild_X.pickle', 'wb') as f:
    pickle.dump(nike_vaild_X, f)

with open('nike_train_y.pickle', 'wb') as f:
    pickle.dump(nike_train_y, f)

with open('nike_test_y.pickle', 'wb') as f:
    pickle.dump(nike_test_y, f)

with open('nike_valid_y.pickle', 'wb') as f:
    pickle.dump(nike_valid_y, f)

with open('jordan_train_X.pickle', 'wb') as f:
    pickle.dump(jordan_train_X, f) 

with open('jordan_test_X.pickle', 'wb') as f:
    pickle.dump(jordan_test_X, f)
    
with open('jordan_vaild_X.pickle', 'wb') as f:
    pickle.dump(jordan_vaild_X, f)
    
with open('jordan_train_y.pickle', 'wb') as f:
    pickle.dump(jordan_train_y, f)
    
with open('jordan_test_y.pickle', 'wb') as f:
    pickle.dump(jordan_test_y, f)

with open('jordan_valid_y.pickle', 'wb') as f:
    pickle.dump(jordan_valid_y, f)


In [None]:
# pickle 불러오기
with open('nike_train_X.pickle', 'rb') as f:
    nike_train_X_var = pickle.load(f)
 
with open('nike_test_X.pickle', 'rb') as f:
    nike_test_X_var = pickle.load(f)

with open('nike_vaild_X.pickle', 'rb') as f:
    nike_valid_X_var = pickle.load(f)

with open('nike_train_y.pickle', 'rb') as f:
    nike_train_y_var = pickle.load(f)

with open('nike_test_y.pickle', 'rb') as f:
    nike_test_y_var = pickle.load(f)

with open('nike_valid_y.pickle', 'rb') as f:
    nike_valid_y_var = pickle.load(f)

with open('jordan_train_X.pickle', 'rb') as f:
    jordan_train_X_var = pickle.load(f) 

with open('jordan_test_X.pickle', 'rb') as f:
    jordan_test_X_var = pickle.load(f)
    
with open('jordan_valid_X.pickle', 'rb') as f:
    jordan_valid_X_var = pickle.load(f)
    
with open('jordan_train_y.pickle', 'rb') as f:
    jordan_train_y_var = pickle.load(f)
    
with open('jordan_test_y.pickle', 'rb') as f:
    jordan_test_y_var = pickle.load(f)

with open('jordan_valid_y.pickle', 'rb') as f:
    jordan_valid_y_var = pickle.load(f)


In [None]:
%cd /content/drive/MyDrive/kream_data/model

/content/drive/MyDrive/kream_data/model


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

es=EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30)
mc_nike=ModelCheckpoint('nike_bn_best_model.h5', monitor='val_loss', verbose=1, save_best_only=True)
mc_jordan=ModelCheckpoint('jordan_bn_best_model.h5', monitor='val_loss', verbose=1, save_best_only=True)

In [None]:
# 임베딩 층에 입력될 단어의 수를 지정한다.
word_size = 1216 + 1


nike_bn_model = keras.models.Sequential([
    # 임베딩층
    keras.layers.Embedding(word_size, word_size, input_length=18),
    # 회귀를 위해 임베딩층의 output값을 1차원의 array로 바꾸어준다. 
    keras.layers.Flatten(),
    keras.layers.Dense(64),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(32),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(8),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(2),
    keras.layers.Dense(1)
 ])

nike_bn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.005), loss=tf.keras.losses.Huber(delta=3), metrics= ["mean_absolute_percentage_error"])

nike_bn_model.fit(nike_train_X, nike_train_y, batch_size=16, epochs=300, validation_data = (nike_valid_X,nike_valid_y),  callbacks=[es, mc_nike])

Epoch 1/300
Epoch 1: val_loss improved from inf to 39.91072, saving model to nike_bn_best_model.h5
Epoch 2/300
Epoch 2: val_loss improved from 39.91072 to 35.61482, saving model to nike_bn_best_model.h5
Epoch 3/300
Epoch 3: val_loss did not improve from 35.61482
Epoch 4/300
Epoch 4: val_loss did not improve from 35.61482
Epoch 5/300
Epoch 5: val_loss improved from 35.61482 to 27.77228, saving model to nike_bn_best_model.h5
Epoch 6/300
Epoch 6: val_loss did not improve from 27.77228
Epoch 7/300
Epoch 7: val_loss did not improve from 27.77228
Epoch 8/300
Epoch 8: val_loss improved from 27.77228 to 26.00365, saving model to nike_bn_best_model.h5
Epoch 9/300
Epoch 9: val_loss did not improve from 26.00365
Epoch 10/300
Epoch 10: val_loss did not improve from 26.00365
Epoch 11/300
Epoch 11: val_loss did not improve from 26.00365
Epoch 12/300
Epoch 12: val_loss improved from 26.00365 to 25.62606, saving model to nike_bn_best_model.h5
Epoch 13/300
Epoch 13: val_loss did not improve from 25.626

<keras.callbacks.History at 0x7f0aeee44c70>

In [None]:
model = keras.models.load_model('nike_bn_best_model.h5', custom_objects=None, compile=True)

model.evaluate(nike_test_X, nike_test_y)



[23.184724807739258, 33.571495056152344]

In [None]:
# 임베딩 층에 입력될 단어의 수를 지정한다.
word_size = 967 + 1


nike_bn_model = keras.models.Sequential([
    # 임베딩층
    keras.layers.Embedding(word_size, word_size, input_length=16),
    # 회귀를 위해 임베딩층의 output값을 1차원의 array로 바꾸어준다. 
    keras.layers.Flatten(),
    keras.layers.Dense(64),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(32),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(16),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(8),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(2),
    keras.layers.Dense(1)
 ])

nike_bn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.005), loss=tf.keras.losses.Huber(delta=3), metrics= ["mean_absolute_percentage_error"])

nike_bn_model.fit(jordan_train_X, jordan_train_y, batch_size=16, epochs=300, validation_data = (jordan_valid_X,jordan_valid_y),  callbacks=[es, mc_jordan])

Epoch 1/300
Epoch 1: val_loss improved from inf to 68.49728, saving model to jordan_bn_best_model.h5
Epoch 2/300
Epoch 2: val_loss improved from 68.49728 to 57.07859, saving model to jordan_bn_best_model.h5
Epoch 3/300
Epoch 3: val_loss did not improve from 57.07859
Epoch 4/300
Epoch 4: val_loss improved from 57.07859 to 49.95757, saving model to jordan_bn_best_model.h5
Epoch 5/300
Epoch 5: val_loss did not improve from 49.95757
Epoch 6/300
Epoch 6: val_loss did not improve from 49.95757
Epoch 7/300
Epoch 7: val_loss did not improve from 49.95757
Epoch 8/300
Epoch 8: val_loss improved from 49.95757 to 48.55151, saving model to jordan_bn_best_model.h5
Epoch 9/300
Epoch 9: val_loss did not improve from 48.55151
Epoch 10/300
Epoch 10: val_loss did not improve from 48.55151
Epoch 11/300
Epoch 11: val_loss did not improve from 48.55151
Epoch 12/300
Epoch 12: val_loss did not improve from 48.55151
Epoch 13/300
Epoch 13: val_loss improved from 48.55151 to 47.23617, saving model to jordan_bn_b

<keras.callbacks.History at 0x7f0acc7f6110>

In [None]:
model = keras.models.load_model('jordan_bn_best_model.h5', custom_objects=None, compile=True)

model.evaluate(jordan_test_X, jordan_test_y)



[33.664039611816406, 34.764766693115234]