In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from konlpy.tag import Okt

from tensorflow.keras.preprocessing.text import *
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Concat

In [None]:
df1 = pd.read_csv('./new_real_review_total_500.csv', index_col=0)
df2 = pd.read_csv('./new_fake_review_total_500.csv', index_col=0)

In [None]:
df1['answer']=1
df2['answer']=0

In [None]:
df = pd.concat([df1, df2], ignore_index=True)

In [None]:
df.dropna(subset=['very_cleaned_content'], how='all', inplace=True)

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df.to_csv('./crawling/LSTM_Model_data.csv')

# LSTM Model

In [None]:
df = pd.read_csv("./crawling/LSTM_Model_data.csv", index_col=0)

In [None]:
X = df['very_cleaned_content']
Y = df['answer']

In [None]:
token = Tokenizer()
token.fit_on_texts(X)  
Xtoken = token.texts_to_sequences(X) 

In [None]:
max = 0
for i in range(len(Xtoken)):
    if max < len(Xtoken[i]):
        max = len(Xtoken[i])    
print(max)

In [None]:
Xpad = pad_sequences(Xtoken, max)
print(Xpad)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(Xpad, Y, test_size=0.2)
print(X_train.shape)

In [None]:
wordsize = len(token.word_index)+1  
print(wordsize)

In [None]:
model = Sequential()
model.add(Embedding(wordsize, 300, input_length=max))  

model.add(Conv1D(filters=64, kernel_size=5, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(LSTM(128, activation='tanh', return_sequences=True))  
model.add(Dropout(0.3))

model.add(LSTM(64, activation='tanh', return_sequences=True))  
model.add(Dropout(0.3))

model.add(LSTM(32, activation='tanh', return_sequences=True))  
model.add(Flatten())
model.add(Dense(1, activation='sigmoid')) 

print(model.summary())

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
fit_hist = model.fit(X_train, y_train, batch_size=200, epochs=7, validation_split = 0.2)

In [None]:
plt.plot(fit_hist.history['loss'], label='loss')
plt.plot(fit_hist.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

In [None]:
score = model.evaluate(X_train, y_train, verbose=0)
print(score)

In [None]:
model.save('./model/LSTM_MODEL.h5')

# 학습된 LSTM Model을 값으로 적용단계

In [None]:
test_data = pd.read_csv('./LSTM_Model_data.csv', index_col=0)

In [None]:
test_X = test_data['very_cleaned_content']

In [None]:
test_data['LSTM_data'] = 0

In [None]:
test_Xtoken = token.texts_to_sequences(test_X)

In [None]:
test_Xpad = pad_sequences(test_Xtoken, max)

In [None]:
test_max = 0
for i in range(len(test_Xpad)):
    if test_max < len(test_Xpad[i]):
        test_max = len(test_Xpad[i])    

In [None]:
test_predict = model.predict(test_Xpad)
test_data['LSTM_data'] = test_predict

In [None]:
test_data.to_csv('./LSTM_Model_data.csv')

# DNN Model

In [None]:
df = pd.read_csv("./crawling/LSTM_Model_data.csv", index_col=0)

In [None]:
data_input = df[['image_count', 'video_count', 'link_count', 'content_count', 'hash_tag_count','blog_tag_count',  
                'bluetooth_word_count','cleaned_content_count', 'LSTM_data']]
data_target = df[['answer']]

In [None]:
minmaxscaler = MinMaxScaler()
scaled_input_data = minmaxscaler.fit_transform(data_input)
scaled_input_data.shape

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(scaled_input_data, labeled_target_data, test_size=0.2)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

In [None]:
model = Sequential()
model.add(InputLayer(input_shape=(8,)))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

In [None]:
fit_hist = model.fit(X_train, Y_train, epochs=200, batch_size=5, validation_split = 0.2, callbacks=[early_stop]) 

In [None]:
score = model.evaluate(X_test, Y_test, verbose=0)
print('Keras DNN model loss :',score[0])
print('Keras DNN accuracy :', score[1])

In [None]:
plt.plot(fit_hist.history['loss'], label='loss')
plt.plot(fit_hist.history['val_loss'], label='val_loss')
plt.legend()
plt.show()

In [None]:
model.save('./model/DNN_Model.h5')