# Path Config

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Colab Notebooks/Neural_Collaborative_Filltering/Neural_Collaborative_Filltering_ksw

/content/drive/MyDrive/Colab Notebooks/Neural_Collaborative_Filltering/Neural_Collaborative_Filltering_ksw


# Git Code Line

## Commit & Push

In [1]:

!git config --global user.email 'dhjkl123@naver.com'
!git config --global user.name 'dhjkl123'

!git add --all
!git commit -m '20230401 GMF'
!git push

[Errno 2] No such file or directory: '/content/drive/MyDrive/Colab Notebooks/Neural_Collaborative_Filltering/Neural_Collaborative_Filltering_ksw'
/content


## Branch

In [7]:
!git branch GMF

## CheckOut
- main
- GMF

In [8]:
!git checkout 'GMF'

M	Neural Collaborative Filltering.ipynb
Switched to branch 'GMF'


# Data Load (Movie Lens)

In [3]:
!pip install scikit-surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 KB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp39-cp39-linux_x86_64.whl size=3193677 sha256=862b2d11a487fea5312142eadfc9c4366afb20a47d49dc3b2fa3356ca822a49a
  Stored in directory: /root/.cache/pip/wheels/c6/3a/46/9b17b3512bdf283c6cb84f59929cdd5199d4e754d596d22784
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3


In [22]:
from surprise import Dataset
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [160]:
data = Dataset.load_builtin('ml-100k',prompt=False)
raw_data = np.array(data.raw_ratings,dtype=int)
n_users = np.max(raw_data[:, 0]) +1
n_movies = np.max(raw_data[:, 1]) +1
raw_data[:,0] -= 1
raw_data[:,1] -= 1
label_arr = np.array([1] * len(raw_data))

train_x, val_x, train_y, val_y = train_test_split(raw_data, label_arr, test_size=0.3, random_state=2023)

In [161]:
user_train = train_x[:, 0]
item_train = train_x[:, 1]
rate_train = train_x[:, 2]

user_val = val_x[:, 0]
item_val = val_x[:, 1]
rate_val = val_x[:, 2]

In [162]:
def make_user_item_table(user,item):
  max_user = n_users
  max_item = n_movies
  table = np.zeros([max_user,max_item])

  for u, i in zip(user, item):
    table[u][i] = 1

  return table

def make_nagative(user,item,label, table, num_neg):
  return_user = []
  return_item = []
  return_label = []

  max_item = np.max(item)

  for u, i, l in zip(user, item, label):
    return_user.append(u)
    return_item.append(i)
    return_label.append(l)

    for _ in range(num_neg):
      neg_idx = np.random.randint(max_item)

      while table[u][neg_idx]:
        neg_idx = np.random.randint(max_item)

      return_user.append(u)
      return_item.append(neg_idx)
      return_label.append(0)

  return np.array(return_user), np.array(return_item), np.array(return_label)


In [163]:
user_item_table = make_user_item_table(user_train,item_train)

In [164]:
user_train,item_train,train_y = make_nagative(user_train, item_train, train_y, user_item_table, 3)

In [165]:
np.unique(train_y, return_counts=True)

(array([0, 1]), array([210000,  70000]))

In [166]:
user_item_table_val = make_user_item_table(user_val,item_val)
user_val,item_val,val_y = make_nagative(user_val, item_val, val_y, user_item_table_val, 2)

# GMF 구현

## Keras

### import

In [10]:
from tensorflow import keras

## cfg

In [136]:
cfg = {
    'embedd_input_user' : n_users,
    'embedd_output_user' : 64,
    'embedd_input_item' : n_movies,
    'embedd_output_item' : 64,
    'output' : 1,
}

### Source

In [167]:
keras.backend.clear_session()

user_input_layer = keras.layers.Input(shape=(1,))
item_input_layer = keras.layers.Input(shape=(1,))

user_vector_layer_gmf = keras.layers.Embedding(input_dim= cfg['embedd_input_user'], output_dim=cfg['embedd_output_user'])(user_input_layer)
user_vector_layer_gmf = keras.layers.Flatten()(user_vector_layer_gmf)

item_vector_layer_gmf = keras.layers.Embedding(input_dim= cfg['embedd_input_item'], output_dim=cfg['embedd_output_item'])(item_input_layer)
item_vector_layer_gmf = keras.layers.Flatten()(item_vector_layer_gmf)

gmf_layer = keras.layers.Multiply()([user_vector_layer_gmf,item_vector_layer_gmf])

output_layer = keras.layers.Dense(cfg['output'],kernel_initializer='lecun_uniform')(gmf_layer)

model = keras.models.Model([user_input_layer,item_input_layer],output_layer)
model.compile(loss= 'binary_crossentropy', optimizer = 'adam')


In [168]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 64)        60416       ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 64)        107712      ['input_2[0][0]']                
                                                                                              

In [169]:
es = keras.callbacks.EarlyStopping(verbose=1,patience=5)

In [170]:
model.fit([user_train,item_train],train_y,validation_split=0.3,callbacks=[es],verbose=1,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: early stopping


<keras.callbacks.History at 0x7f53758361f0>

In [171]:
pred = model.predict([user_val,item_val])



In [172]:
pred = np.where(pred >= 0.5,1,0)

In [173]:
np.unique(pred, return_counts=True)

(array([0, 1]), array([65376, 24624]))

In [174]:
print(classification_report(val_y, pred))

              precision    recall  f1-score   support

           0       0.78      0.85      0.81     60000
           1       0.63      0.52      0.57     30000

    accuracy                           0.74     90000
   macro avg       0.70      0.68      0.69     90000
weighted avg       0.73      0.74      0.73     90000

