## IPC

### Module Import

In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib 

from sklearn import model_selection, metrics
# from __future__ import print_function
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Activation, Embedding, LSTM, Conv1D, MaxPooling1D
from tensorflow.python.client import device_lib
import keras.backend.tensorflow_backend as K
from keras import layers, models, Model
import tensorflow as tf
#word_tokenizing
import re
import os
from imblearn.over_sampling import SMOTE
# preprocessing
from nltk import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer,LancasterStemmer,WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import RegexpTokenizer
from nltk_data.corpora import wordnet
from keras.utils import multi_gpu_model

### Text data tokenizing

#### Data input

In [3]:
input_data = './Data/tripad_data.csv'
raw_data = pd.read_csv(input_data)
raw_data = raw_data.dropna(axis=0) # Delete nan rows
raw_data = raw_data.drop(["Unnamed: 0"], axis=1) # Delete Unnamed: 0 row
#print(raw_data['Content'].head)
raw_data = raw_data.reset_index()
raw_data = raw_data.drop(["index"], axis=1)

In [4]:
print(list(raw_data))

['HotelInfo.HotelID', 'HotelInfo.Name', 'Author', 'ReviewID', 'Service', 'Cleanliness', 'Rooms', 'Value', 'Sleep Quality', 'Location', 'Business service (e.g., internet access)', 'Check in / front desk', 'Overall', 'Title', 'Content', 'Date', 'AuthorLocation']


#### Stopwords Apply

In [8]:
#english (Stopwords file)
english_file = open('./Data/english', mode='r')
english_data = english_file.readlines()
english_text = []

for i in english_data:
    english_text.append(i[:-1])

english_text

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [9]:
# Stopwords Removement
def clean_review(text):
    # 기호문자 제거
    review_text = re.sub("[^a-zA-Z]", " ", text)
    # 소문자로 변경 후 분리
    word_tokens = review_text.lower().split()
    word_tokens = review_text.lower().split() # 없어도 됨
    # 표제어 추출하기 위한 함수
    le = WordNetLemmatizer()
    # english_text 안의 영어 불용어를 집합으로 변환
    stop_words = set(english_text)
    word_tokens = [le.lemmatize(w) for w in word_tokens if not w in stop_words]
    # 토근화된 문장을 하나의 문장으로 변환
    cleaned_review = " ".join(word_tokens)
    
    # \W:비문자 \b:단어 경계(\w와 \W의 경계) \w:문자
    # compile 정규표현식을 컴파일 하는 함수
    shortword = re.compile(r'\W*\b\w{1,2}\b')
    cleaned_review = shortword.sub('', cleaned_review)
    
    return cleaned_review

# Stopwords Apply Content 열에 clean_review 함수를 통해 apply
raw_data['Content'] = raw_data['Content'].apply(clean_review)
raw_data.head

<bound method NDFrame.head of          HotelInfo.HotelID       HotelInfo.Name        Author     ReviewID  \
0                   280518  NH Berlin City West       Clara79    UR2592389   
1                   280518  NH Berlin City West  BerlinerPooh    UR3015596   
2                   280518  NH Berlin City West  amazingthing    UR3456048   
3                   280518  NH Berlin City West         Nitus    UR4045777   
4                   280518  NH Berlin City West     mattp1874    UR7014779   
...                    ...                  ...           ...          ...   
1275283              78587          Chablis Inn   tulipslover  UR124992186   
1275284              78587          Chablis Inn       Kathi B  UR125442854   
1275285              78587          Chablis Inn    Carolynn S  UR125932840   
1275286              78587          Chablis Inn    FrankHutch  UR127701764   
1275287              78587          Chablis Inn     scoredonu  UR127992624   

         Service  Cleanliness  Ro

In [10]:
tokenizer = RegexpTokenizer(r'\w+')
# raw_data의 'Content'열의 타입을 str 형태로 변경
raw_data['Content'] = raw_data['Content'].astype('str')
# raw_data의 'Content'열의 각 문장들을 단어로 tokenize
raw_data["Content"] = raw_data["Content"].apply(tokenizer.tokenize)

In [11]:
content_list = np.array(raw_data['Content'].tolist())
print(content_list)

[list(['hotel', 'bit', 'surprise', 'could', 'find', 'review', 'anywhere', 'however', 'turned', 'entirely', 'pleasant', 'one', 'location', 'great', 'close', 'damn', 'charlottenburg', 'literally', 'next', 'berliner', 'strasse', 'bahn', 'staff', 'excellent', 'willing', 'help', 'bunch', 'english', 'direction', 'good', 'restaurant', 'club', 'hour', 'room', 'huge', 'expected', 'bedroom', 'bathroom', 'got', 'luxurious', 'lounge', 'little', 'kitchen', 'would', 'highly', 'recommend', 'hotel', 'anyone', 'paid', 'twin', 'room', 'pity', 'could', 'stay', 'longer', 'appreciate', 'berlin', 'great', 'hotel'])
 list(['room', 'adequate', 'little', 'drab', 'side', 'noisy', 'main', 'street', 'front', 'kitchenette', 'removed', 'used', 'area', 'storing', 'luggage', 'shopping', 'underground', 'road', 'another', 'across', 'street', 'take', 'time', 'reach', 'fabulous', 'shop', 'restaurant', 'busy', 'area', 'damm', 'main', 'entrance', 'hotel', 'rather', 'grand', 'adjacent', 'large', 'cafeteria', 'bar', 'area', 

  """Entry point for launching an IPython kernel.


### Data Tokenizing & word_indexing

In [12]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(content_list)

ipc_Xdata = np.array(tokenizer.texts_to_sequences(content_list))

print(ipc_Xdata)
print(ipc_Xdata.shape)

ipc_Ydata = np.array(raw_data[['Service', 'Cleanliness','Value',
                               'Sleep Quality','Rooms','Location']])
print(ipc_Ydata)
print(ipc_Ydata.shape)

[list([2, 81, 753, 28, 103, 88, 466, 97, 593, 2810, 277, 9, 14, 3, 64, 4989, 15450, 781, 52, 12302, 9472, 2841, 6, 59, 1076, 272, 2038, 443, 581, 8, 23, 284, 95, 1, 197, 387, 233, 31, 46, 901, 329, 47, 364, 5, 290, 61, 2, 310, 246, 933, 1, 3923, 28, 4, 795, 1579, 1339, 3, 2])
 list([1, 546, 47, 4179, 150, 386, 204, 60, 44, 1310, 1763, 192, 24, 4099, 335, 223, 1251, 347, 121, 206, 60, 84, 10, 1248, 506, 205, 23, 333, 24, 7154, 204, 570, 2, 393, 314, 1315, 77, 3954, 50, 24, 17, 190, 258, 6, 32, 552, 47, 709, 23, 40, 66, 538, 62, 888, 180, 1225, 101, 72, 30])
 list([48, 343, 88, 150, 379, 137, 110, 275, 1, 46, 9, 199, 66, 204, 347, 131, 294, 47, 93, 233, 531, 539, 1, 46, 1310, 26, 239, 1, 7, 731, 2, 1339, 99, 357, 1876, 2, 81, 2075, 496, 52, 225, 132, 84, 229, 1585, 13598, 14546, 31202, 1582, 2, 6, 39, 5, 2650, 295, 52, 1339, 4, 2])
 ...
 list([45, 68, 569, 1, 1534, 4260, 1, 19, 38, 7, 856, 29, 42, 292, 2474, 1164, 702, 4096, 53, 134, 33157, 1509, 813, 12573, 1414, 7, 856, 263, 45, 304, 1

  after removing the cwd from sys.path.


### Data Split & Get maxlen and max_features

In [13]:
train_x, test_x, train_y, test_y = train_test_split(ipc_Xdata, ipc_Ydata, 
                                                    test_size=0.3)
print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)

(892701,) (382587,) (892701, 6) (382587, 6)


In [14]:
# 최대 길이 및 최대 feature 값 구하기
maxlen = 1
max_features = 1
for idx in ipc_Xdata :
    try :
        len_ = len(idx) #print(idx)
        max_ = np.max(idx)
    except :
        #print(idx)
        continue
    #print(type(max_))
    #break
    if max_features < max_ :
        max_features = max_
    if maxlen < len_ :
        maxlen = len_
print(maxlen)
print(max_features)

2654
263169


### Model building

#### Padding

In [15]:
print('Loading data...')

print(len(train_x), 'train sequences')
print(len(test_x), 'test sequences')

print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)

print('Pad sequences (samples x time)')

ipc_Xdata = sequence.pad_sequences(ipc_Xdata, maxlen=maxlen)

train_x, test_x, train_y, test_y = train_test_split(ipc_Xdata, ipc_Ydata, 
                                                    test_size=0.3)
# print(train_x.shape, test_x.shape, train_y.shape, test_y.shape)
# train_x = sequence.pad_sequences(train_x, maxlen=maxlen)
# test_x = sequence.pad_sequences(test_x, maxlen=maxlen)

print('train_x shape:', train_x.shape)
print('test_x shape:', test_x.shape)


Loading data...
892701 train sequences
382587 test sequences
(892701,) (382587,) (892701, 6) (382587, 6)
Pad sequences (samples x time)
train_x shape: (892701, 2654)
test_x shape: (382587, 2654)


#### variables setting

In [16]:
# Embedding
max_features = max_features+1 # feature 값 변경
# maxlen = 2670 # 리뷰의 길이
embedding_size = 256

# Convolution
kernel_size = 5 # 1
filters = 256 # 256
pool_size = 4

# LSTM
lstm_output_size = 10

# Training
batch_size = 1000 # 310 #30 -> 20
epochs = 100

#### Model Building

In [8]:
print('Build model...')
#model = Sequential()
input_layer = layers.Input((maxlen,))

embedding_layer = layers.Embedding(max_features, embedding_size)(input_layer)

convolution_layer = layers.Conv1D(256, 5, padding = 'valid',
                                 activation='relu', strides=1)(embedding_layer)
maxpooling_layer = layers.MaxPooling1D(pool_size)(convolution_layer)

convolution_layer2 = layers.Conv1D(128, 4, padding = 'valid',
                                 activation='relu', strides=1)(maxpooling_layer)
maxpooling_layer2 = layers.MaxPooling1D(pool_size)(convolution_layer2)

convolution_layer3 = layers.Conv1D(64, 3, padding = 'valid',
                                 activation='relu', strides=1)(maxpooling_layer2)
maxpooling_layer3 = layers.MaxPooling1D(pool_size)(convolution_layer3)

lstm_layer = layers.Bidirectional(layers.CuDNNLSTM(32,return_sequences=True))(maxpooling_layer3)

#lstm_layer = layers.CuDNNLSTM(64,return_sequences=True)(maxpooling_layer3)

dropout_layer = layers.Dropout(0.25)(lstm_layer) 

flatten_layer = layers.Flatten()(dropout_layer)

output_layer1 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)
output_layer2 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)
output_layer3 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)
output_layer4 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)
output_layer5 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)
output_layer6 = layers.Dense(1, activation = 'sigmoid')(flatten_layer)

model = models.Model(inputs=input_layer, outputs=
                     [output_layer1, output_layer2, output_layer3, output_layer4,
                      output_layer5, output_layer6])
model.summary()

model.compile(loss='mse', optimizer='rmsprop',metrics=['accuracy'])

Build model...


NameError: name 'maxlen' is not defined

#### Train

In [3]:
try:
    model = multi_gpu_model(model, gpus=2)
    print('Train...')
    hist = model.fit(train_x, [train_y[:,0]/5, train_y[:,1]/5, 
                               train_y[:,2]/5, train_y[:,3]/5, 
                               train_y[:,4]/5, train_y[:,5]/5],
                     batch_size=batch_size,
                     epochs=epochs)

    # score, acc = model.evaluate(test_x, test_y[:,0:6], 
    #                             batch_size=batch_size)

    score = model.evaluate(test_x, [test_y[:,0]/5, test_y[:,1]/5,
                                     test_y[:,2]/5, test_y[:,3]/5,
                                     test_y[:,4]/5, test_y[:,5]/5], 
                            batch_size = batch_size)

    predicted = model.predict(test_x)
    print('predicted : ',predicted)
    predicted2 = model.predict(ipc_Xdata)
    #print('Test score:', score)
    #print('Test accuracy:', acc)
except:
    with K.tf.device('/gpu:0'):
        print('Train...')
        hist = model.fit(train_x, [train_y[:,0]/5, train_y[:,1]/5, 
                                   train_y[:,2]/5, train_y[:,3]/5, 
                                   train_y[:,4]/5, train_y[:,5]/5],
                         batch_size=batch_size,
                         epochs=epochs)

        # score, acc = model.evaluate(test_x, test_y[:,0:6], 
        #                             batch_size=batch_size)

        score = model.evaluate(test_x, [test_y[:,0]/5, test_y[:,1]/5,
                                         test_y[:,2]/5, test_y[:,3]/5,
                                         test_y[:,4]/5, test_y[:,5]/5], 
                                batch_size = batch_size)

        predicted = model.predict(test_x)
        print('predicted : ',predicted)
        predicted2 = model.predict(ipc_Xdata)
        #print('Test score:', score)
        #print('Test accuracy:', acc)

NameError: name 'model' is not defined

In [98]:
print(len(test_x))
print(len(predicted))
arr_predicted = np.array(predicted2)
print(arr_predicted.shape)
print(arr_predicted)

arr = np.squeeze(arr_predicted, 2)
arr = np.transpose(arr)
print(arr.shape)
print('times 5')
# print(arr)
arr = arr * 5
arr = np.rint(arr)
arr = arr.astype('int64')
print(arr.shape)
print(arr)

382587
6
(6, 1275288, 1)
[[[0.6803021 ]
  [0.7231426 ]
  [0.69011855]
  ...
  [0.5843144 ]
  [0.7246235 ]
  [0.6118517 ]]

 [[0.7072035 ]
  [0.7487986 ]
  [0.717008  ]
  ...
  [0.6055726 ]
  [0.75017154]
  [0.6349615 ]]

 [[0.6667578 ]
  [0.697686  ]
  [0.6736307 ]
  ...
  [0.5890348 ]
  [0.6988027 ]
  [0.6118171 ]]

 [[0.33349645]
  [0.36442733]
  [0.34100583]
  ...
  [0.27507845]
  [0.36562064]
  [0.2928089 ]]

 [[0.59435093]
  [0.63623214]
  [0.60390544]
  ...
  [0.49577677]
  [0.6376012 ]
  [0.5240173 ]]

 [[0.6336271 ]
  [0.6583353 ]
  [0.6386699 ]
  ...
  [0.5871757 ]
  [0.65917116]
  [0.60093206]]]
(1275288, 6)
times 5
(1275288, 6)
[[3 4 3 2 3 3]
 [4 4 3 2 3 3]
 [3 4 3 2 3 3]
 ...
 [3 3 3 1 2 3]
 [4 4 3 2 3 3]
 [3 3 3 1 3 3]]


In [99]:
print(raw_data['HotelInfo.HotelID'])

0          280518
1          280518
2          280518
3          280518
4          280518
            ...  
1275283     78587
1275284     78587
1275285     78587
1275286     78587
1275287     78587
Name: HotelInfo.HotelID, Length: 1275288, dtype: int64


In [100]:
criteria_df = pd.DataFrame(arr)
IPC = pd.concat([raw_data['HotelInfo.HotelID'], criteria_df], axis=1)
#IPC = IPC.set_index('HotelInfo.HotelID')
print(type(IPC))
IPC.columns = ['HotelID','Service', 'Cleanliness', 'Rooms', 'Value','Sleep Quality', 'Location']
print(IPC.head())

<class 'pandas.core.frame.DataFrame'>
   HotelID  Service  Cleanliness  Rooms  Value  Sleep Quality  Location
0   280518        3            4      3      2              3         3
1   280518        4            4      3      2              3         3
2   280518        3            4      3      2              3         3
3   280518        4            4      4      2              3         4
4   280518        4            4      4      2              3         3


In [101]:
IPC = round(IPC.groupby(['HotelID'], as_index=True).mean())
print(IPC)

         Service  Cleanliness  Rooms  Value  Sleep Quality  Location
HotelID                                                             
72572        4.0          4.0    3.0    2.0            3.0       3.0
72579        4.0          4.0    3.0    2.0            3.0       3.0
72586        4.0          4.0    3.0    2.0            3.0       3.0
72598        4.0          4.0    3.0    2.0            3.0       3.0
73393        4.0          4.0    3.0    2.0            3.0       3.0
...          ...          ...    ...    ...            ...       ...
2516241      4.0          4.0    4.0    2.0            3.0       3.0
2516242      4.0          4.0    4.0    2.0            3.0       3.0
2516243      4.0          4.0    4.0    2.0            3.0       3.0
2516244      4.0          4.0    4.0    2.0            3.0       3.0
2520289      4.0          4.0    3.0    2.0            3.0       3.0

[7584 rows x 6 columns]


## UPC

In [106]:
dnn_input_data = pd.concat([raw_data['Author'],criteria_df,raw_data['Overall']],axis=1)
print(dnn_input_data)

               Author  0  1  2  3  4  5  Overall
0             Clara79  3  4  3  2  3  3      3.0
1        BerlinerPooh  4  4  3  2  3  3      1.0
2        amazingthing  3  4  3  2  3  3      3.0
3               Nitus  4  4  4  2  3  4      4.0
4           mattp1874  4  4  4  2  3  3      4.0
...               ... .. .. .. .. .. ..      ...
1275283   tulipslover  4  4  3  2  3  3      3.0
1275284       Kathi B  4  4  3  2  3  3      5.0
1275285    Carolynn S  3  3  3  1  2  3      4.0
1275286    FrankHutch  4  4  3  2  3  3      4.0
1275287     scoredonu  3  3  3  1  3  3      3.0

[1275288 rows x 8 columns]


In [109]:
dnn_input_data

Unnamed: 0,Author,0,1,2,3,4,5,Overall
0,Clara79,3,4,3,2,3,3,3.0
1,BerlinerPooh,4,4,3,2,3,3,1.0
2,amazingthing,3,4,3,2,3,3,3.0
3,Nitus,4,4,4,2,3,4,4.0
4,mattp1874,4,4,4,2,3,3,4.0
...,...,...,...,...,...,...,...,...
1275283,tulipslover,4,4,3,2,3,3,3.0
1275284,Kathi B,4,4,3,2,3,3,5.0
1275285,Carolynn S,3,3,3,1,2,3,4.0
1275286,FrankHutch,4,4,3,2,3,3,4.0


### Generate User List

In [111]:
def user_list_fu(dnn_input_data):
    user_list = dnn_input_data.index
    user_list = user_list.unique()
    return user_list

UPC_list = []

In [112]:
def train_test_data(one_user):    
    x_train, x_test,y_train, y_test = train_test_split(one_user.iloc[:,0:6],one_user.iloc[:,6], random_state=66, test_size=0.3)
    return x_train,x_test,y_train,y_test

### Model building

In [113]:
def dnn_model():
    #2. 모델 구성
    input1 = Input(shape=(6,))
    #dense1 = Dense(5, activation='relu')(input1)
    #dense2 = Dense(3)(dense1)
    output = Dense(1, activation='sigmoid')(input1)
    model = Model(inputs=input1, outputs = output)
    
    model.compile(optimizer='adam', loss='MSE',metrics=['accuracy'])

    return model

In [114]:
def train_test_data(one_user):    
    x_train, x_test,y_train, y_test = train_test_split(one_user.iloc[:,0:6],one_user.iloc[:,6], random_state=66, test_size=0.3)
    return x_train,x_test,y_train,y_test

### Train

In [127]:
def model_fit(model, x_train, y_train, x_test, y_test):
    # 3. 훈련
    model.fit(x_train,y_train/5, epochs=1, batch_size=1, validation_data=(x_test,y_test/5))
    
    # 4. 평가 예측
    loss, mse = model.evaluate(x_test,y_test/5,batch_size=1)
    print('acc:',mse)
    return model

In [116]:
def weights_list(model):
    weights = model.layers[1].get_weights()
    weights = MinMaxScaler().fit_transform(weights[0])
    weights = np.array(weights)
    weights= np.squeeze(weights,axis=1)
    UPC_list=[]
    UPC_list.append(weights)
    return UPC_list

In [117]:
def dnn_main(one_user):
    x_train,x_test,y_train,y_test = train_test_data(one_user)
    
    model = dnn_model()
    model = model_fit(model, x_train, y_train, x_test, y_test)
    #pred_y = predict_y()
    
    UPC_list=weights_list(model)
    
    return UPC_list

### Dataframe Converting

In [131]:
def UPC_dataframe(UPC_list,user_list):   
    UPC_data = pd.DataFrame(index=range(0,0),columns=['Author','Service','Cleanliness','Rooms','Value','Sleep Quality','Location'])
    UPC_data = UPC_data.set_index('Author')

    for i in range(len(UPC_list)):
        UPC_data = UPC_data.append(pd.Series(UPC_list[i],name = user_list[i],index=UPC_data.columns))

    return UPC_data

In [130]:
def len_pre(data_over):
    data_count = data_over['Author'].value_counts()
    tmp = pd.DataFrame(data_count.index, columns=['Author'])
    tmp['count'] = data_count.values
    data_count = tmp


    np_data = np.array(data_count)
    np_data=np_data[np.where(np_data[:,1]>=10)]
    np_data=np_data[np.where(np_data[:,1]<=50)]

    np_data = np.delete(np_data,(1),axis=1)
    df1 = pd.DataFrame(columns=['name'])
    name = np_data.reshape(-1,)
    df1['name']= name
    data_over= data_over[data_over['Author'].isin(df1['name'])]
    data_over = data_over.reset_index(drop=True)
    data_over = data_over.set_index('Author')
    
    return data_over

### Matrix Multiplication

In [129]:
def matrix_multiplication(IPC,UPC):
    #호텔, 저자 이름 추출
    IPC = IPC.reset_index()
    UPC = UPC.reset_index()
    
    hotel_name = IPC['HotelID']
    test_IPC = IPC.drop(['HotelID'],axis=1)

    Author_name = UPC['Author']
    test_UPC = UPC.drop(['Author'],axis=1)
    ##################################################################

    trans_UPC=np.transpose(test_UPC)

    predict_matrix = np.dot(test_IPC,trans_UPC)
    predict_matrix = pd.DataFrame(predict_matrix)

    predict_matrix.columns=Author_name
    predict_matrix.index = hotel_name
    
    return predict_matrix


In [121]:
def main(dnn_input_data,IPC):
    with K.tf.device('/gpu:0'):
        dnn_input_data = len_pre(dnn_input_data)
        user_list = user_list_fu(dnn_input_data)
        for i in range(len(user_list)):#유저 수만큼 반복
            one_user= dnn_input_data.loc[user_list[i]]#유저 1명에 대한 리뷰
            UPC_list = dnn_main(one_user)#main()함수로 DNN학습 후 유저별 가중치를 list에 저장

        UPC = UPC_dataframe(UPC_list,user_list)#UPC_list를 데이터 프레임화
        
        predict_matrix = matrix_multiplication(IPC,UPC)
         
        return predict_matrix

### Main Activation (UPC)

In [128]:
predict_matrix = main(dnn_input_data,IPC)
print(predict_matrix)

Train on 13 samples, validate on 6 samples
Epoch 1/1
acc: 0.0
Train on 20 samples, validate on 9 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.6666666666666666
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.5
Train on 9 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.3333333333333333
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.0
Train on 13 samples, validate on 6 samples
Epoch 1/1
acc: 0.3333333333333333
Train on 9 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.25
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.75
Train on 10 samples, validate on 5 

acc: 0.0
Train on 9 samples, validate on 5 samples
Epoch 1/1
acc: 0.6
Train on 10 samples, validate on 5 samples
Epoch 1/1
acc: 0.6
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 10 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.25
Train on 25 samples, validate on 12 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.3333333333333333
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.5
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.6666666666666666
Train on 9 samples, validate on 5 samples
Epoch 1/1
acc: 0.8
Train on 10 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 16 samples, validate on 8 samples
Epoch 1/1
acc: 0.5
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.0
Train on 9 samples, validate on 5 samples
Epoch 1/1
acc: 0.4
Train on 7 samples, validate on 4 sampl

acc: 0.16666666666666666
Train on 11 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 11 samples, validate on 6 samples
Epoch 1/1
acc: 0.6666666666666666
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.3333333333333333
Train on 10 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 14 samples, validate on 7 samples
Epoch 1/1
acc: 0.0
Train on 11 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 10 samples, validate on 5 samples
Epoch 1/1
acc: 0.0
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.25
Train on 9 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 14 samples, validate on 6 samples
Epoch 1/1
acc: 0.0
Train on 11 samples, validate on 6 samples
Epoch 1/1
acc: 0.0
Train on 9 samples, validate on 5 samples
Epoch 1/1
acc: 0.8
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 8 samples, validate on 4 samples
Epoch 1/1
acc: 0.5
Train on 8 samples, v

acc: 0.25
Train on 7 samples, validate on 3 samples
Epoch 1/1
acc: 0.3333333333333333
Train on 7 samples, validate on 4 samples
Epoch 1/1
acc: 0.0
Train on 14 samples, validate on 7 samples
Epoch 1/1


KeyboardInterrupt: 