In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm
import itertools

from User_based_CF import *
from Item_based_CF import *
from Matrix_Factorization import *
from Factorization_Machine import *
from IPNN_model import *
from FNN_model import *
from OPNN_model import *
from PIN_model import *


import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from sklearn.metrics import mean_squared_error
import math


import json

In [2]:
# Create a user-item matrix
def create_user_item_matrix(data, user_column_name, item_column_name, result_name):
    """
    data: (user_column_name, item_column_name, result_name, timestamp)
    """
    user_list = data.iloc[:, 0]
    item_list = data.iloc[:, 1]
    rating_list = data[result_name].values
    user_item_matrix_data = pd.crosstab(index=user_list, columns=item_list, values=rating_list, aggfunc=np.mean)
    return user_item_matrix_data

In [3]:
# Identify whether the value exists or not.
def identify_value_exist(user_item_matrix_data):
    """
    user_item_matrix_data: DataFrame
    """
    return (user_item_matrix_data.isna() == False).astype("int")

In [4]:
# Create a user-item matrix
def create_user_item_matrix_for_matrix_factorization(data, unique_user_id, unique_item_id):
    """
    data: (user, item, rating, timestamp)
    """
    user_item_matrix_data = pd.DataFrame(np.array([np.nan] * (len(unique_user_id) * len(unique_item_id))).reshape(len(unique_user_id), len(unique_item_id)),\
        index=unique_user_id, columns=unique_item_id)
    
    for one_index in data.index:
        user_item_matrix_data.loc[data.loc[one_index, "User_id"], data.loc[one_index, "Item_id"]] = \
            data.loc[one_index, "Rating"]
    return user_item_matrix_data

In [5]:
# 要建構四種資料，分別為User的特徵、Item的特徵、User-Item matrix與User對應Item的紀錄
def split_four_data(user_data, item_data, user_item_interaction_data, user_column_name, item_column_name, result_name):
    """
    user_data：使用者相關資料（user_id一定要放第一個column）
    item_data：物品相關資料（item_id一定要放第二個column）
    """
    all_data = list()
    if isinstance(user_data, pd.DataFrame):
        # user_feature_data = user_data.iloc[:, 1:]
        all_data.append(user_data)
    
    if isinstance(item_data, pd.DataFrame):
        # item_feature_data = item_data.iloc[:, 1:]
        all_data.append(item_data)
    
    if isinstance(user_item_interaction_data, pd.DataFrame):
        # transform train data into user-item matrix
        user_item_matrix_data = create_user_item_matrix(user_item_interaction_data, user_column_name, item_column_name, result_name)
        all_data.append(user_item_interaction_data)
        all_data.append(user_item_matrix_data)
    return all_data

In [6]:
# 定義OneHotEncoding的內容→Movielens
def movielens_onehotencoding(user_feature_data, movie_feature_data):
    user_age_onehotcoding = OneHotEncoder(sparse=False).fit(user_feature_data["age"].values.reshape((-1, 1)))
    user_occupation_onehotencoding = OneHotEncoder(sparse=False).fit(user_feature_data["occupation"].values.reshape((-1, 1)))
    return user_age_onehotcoding, user_occupation_onehotencoding

# 將每種不同資料前處理

## Movielens

### Load data

In [7]:
with open(r"data\Movielens\movie_genre.dat", "r") as f:
    movie_genre = [i.replace("\n", "").split("\t") for i in f.readlines()]
movie_genre = pd.DataFrame(np.array(movie_genre), columns=["movie_id", "genre"])
movie_genre["genre"] = movie_genre["genre"].astype("str")

with open(r"data\Movielens\movie_movie(knn).dat", "r") as f:
    movie_movie = [i.replace("\n", "").split("\t") for i in f.readlines()]
movie_movie = pd.DataFrame(np.array(movie_movie), columns=["movie1", "movie2", "similarity"])

with open(r"data\Movielens\user_age.dat", "r") as f:
    user_age = [i.replace("\n", "").split("\t") for i in f.readlines()]
user_age = pd.DataFrame(np.array(user_age), columns=["user_id", "age"])

with open(r"data\Movielens\user_occupation.dat", "r") as f:
    user_occupation = [i.replace("\n", "").split("\t") for i in f.readlines()]
user_occupation = pd.DataFrame(np.array(user_occupation), columns=["user_id", "occupation"])

with open(r"data\Movielens\user_user(knn).dat", "r") as f:
    user_user = [i.replace("\n", "").split("\t") for i in f.readlines()]
user_user = pd.DataFrame(np.array(user_user), columns=["user1", "user2", "similarity"])

with open(r"data\Movielens\user_movie.dat", "r") as f:
    user_movie = [i.replace("\n", "").split("\t") for i in f.readlines()]
user_movie = pd.DataFrame(np.array(user_movie), columns=["user_id", "movie_id", "rating", "timestamp"])
user_movie["rating"] = user_movie["rating"].astype("int")

### Build User-Item matrix

In [8]:
user_item_matrix_data = create_user_item_matrix(user_movie, user_column_name="user_id", item_column_name="item_id", result_name="rating")

### Preprocessing of Rating data

In [9]:
# 針對電影種類前處理：由於一部電影可能有多種種類，因此將每個種類用OneHotEncoding表示
movie_genre["index"] = 1
movie_genre = movie_genre.pivot_table(index="movie_id", columns="genre", values="index", fill_value=0)
movie_genre = movie_genre.reset_index()

In [10]:
merge_data = pd.merge(user_movie, user_age, how="inner", on="user_id")
print(merge_data.shape)
merge_data = pd.merge(merge_data, user_occupation, how="left", on="user_id")
print(merge_data.shape)
merge_data = pd.merge(merge_data, movie_genre, how="left", on="movie_id").fillna(0)
print(merge_data.shape)

(100000, 5)
(100000, 6)
(100000, 24)


In [11]:
user_feature_data = merge_data[["age", "occupation"]]
movie_feature_data = merge_data[movie_genre.columns[1:]]

In [12]:
# 產生四種資料
user_feature_data, movie_feature_data, rating_user_item_interaction_data, rating_user_item_matrix_data =\
     split_four_data(user_feature_data, movie_feature_data, user_movie, user_column_name="user_id", item_column_name="movie_id", result_name="rating")

In [13]:
# 把四種資料作訓練與測試資料的切割
rating_train_user_feature_data, rating_test_user_feature_data, rating_train_movie_feature_data, rating_test_movie_feature_data = \
    train_test_split(user_feature_data, movie_feature_data, random_state=12345, test_size=0.25)

rating_train_result_data, rating_test_result_data, rating_train_user_item_interaction_data, rating_test_user_item_interaction_data =\
    train_test_split(rating_user_item_interaction_data["rating"].values, rating_user_item_interaction_data, test_size=0.25, random_state=12345)

In [14]:
# 1. 定義OneHotEncoding的內容→Movielens
rating_user_age_onehotencoding, rating_user_occupation_onehotencoding =\
    movielens_onehotencoding(user_feature_data, movie_feature_data)

# 2. 把所有訓練資料以及測試資料都轉成OneHotEncoding
rating_train_user_age_onehotencoding, rating_test_user_age_onehotencoding =\
    list(map(lambda x: rating_user_age_onehotencoding.transform(x), [rating_train_user_feature_data["age"].values.reshape((-1, 1)), rating_test_user_feature_data["age"].values.reshape((-1, 1))]))
train_user_occupation_onehotencoding, test_user_occupation_onehotencoding =\
    list(map(lambda x: rating_user_occupation_onehotencoding.transform(x), [rating_train_user_feature_data["occupation"].values.reshape((-1, 1)), rating_test_user_feature_data["occupation"].values.reshape((-1, 1))]))

### Preprocessing of binary data

In [15]:
# 產生binary data→定義某個user是否會對某個item做評論
# 1. 產生某個值是否為真實值
identify_value_exist_binary = (user_item_matrix_data.isna() == False).astype("int")

# 2. 產生index名稱為column
identify_value_exist_binary["user_id"] = identify_value_exist_binary.index

# 3. pandas melt
binary_result = pd.melt(identify_value_exist_binary, id_vars=["user_id"])

In [16]:
merge_data = pd.merge(binary_result, user_age, how="inner", on="user_id")
print(merge_data.shape)
merge_data = pd.merge(merge_data, user_occupation, how="left", on="user_id")
print(merge_data.shape)
merge_data = pd.merge(merge_data, movie_genre, how="left", on="movie_id").fillna(0)
print(merge_data.shape)

(1586126, 4)
(1586126, 5)
(1586126, 23)


In [17]:
user_feature_data = merge_data[["age", "occupation"]]
movie_feature_data = merge_data[movie_genre.columns[1:]]

In [18]:
# 產生四種資料
user_feature_data, movie_feature_data, binary_user_item_interaction_data, binary_user_item_matrix_data =\
     split_four_data(user_feature_data, movie_feature_data, user_movie, user_column_name="user_id", item_column_name="movie_id", result_name="rating")

In [19]:
# 把四種資料作訓練與測試資料的切割
binary_train_user_feature_data, binary_test_user_feature_data, binary_train_movie_feature_data, binary_test_movie_feature_data = \
    train_test_split(user_feature_data, movie_feature_data, random_state=12345, test_size=0.25)

binary_train_result_data, binary_test_result_data, binary_train_user_item_interaction_data, binary_test_user_item_interaction_data =\
    train_test_split(binary_user_item_interaction_data["rating"].values, binary_user_item_interaction_data, test_size=0.25, random_state=12345)

In [20]:
# 1. 定義OneHotEncoding的內容→Movielens
binary_user_age_onehotencoding, binary_user_occupation_onehotencoding =\
    movielens_onehotencoding(user_feature_data, movie_feature_data)

# 2. 把所有訓練資料以及測試資料都轉成OneHotEncoding
binary_train_user_age_onehotencoding, binary_test_user_age_onehotencoding =\
    list(map(lambda x: binary_user_age_onehotencoding.transform(x), [binary_train_user_feature_data["age"].values.reshape((-1, 1)), binary_test_user_feature_data["age"].values.reshape((-1, 1))]))
binary_train_user_occupation_onehotencoding, binary_test_user_occupation_onehotencoding =\
    list(map(lambda x: binary_user_occupation_onehotencoding.transform(x), [binary_train_user_feature_data["occupation"].values.reshape((-1, 1)), binary_test_user_feature_data["occupation"].values.reshape((-1, 1))]))

## Douban_Book

## Yelp

# Collaborative Filtering

In [21]:
similarity_method = ["pearson", "cosine"]
K_list = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
CF_result = dict()

for one_similarity_method in similarity_method:
    # User-based Collaborative Filtering
    user_cf = User_based_CF(rating_train_user_item_interaction_data, user_item_matrix_data)
    user_user_correlation_data = user_cf.compute_correlation(corr_methods=one_similarity_method)

    for K in K_list:
        # 針對test data做預測以及模型評估（注意，每次計算是針對一筆資料）
        pred_user_data = list(map(lambda x: user_cf.predict_without_time(rating_test_user_item_interaction_data.iloc[x, 0], rating_test_user_item_interaction_data.iloc[x, 1], user_column_name="user_id", item_column_name="movie_id", num_user=K), tqdm([i for i in range(rating_test_user_item_interaction_data.shape[0])])))
        pred_user_data = [i if i > 0 else 0 for i in pred_user_data]
        CF_result[f"user-based_{one_similarity_method}_{K}"] = math.sqrt(mean_squared_error(y_true=rating_test_user_item_interaction_data["rating"].values, y_pred=np.array(pred_user_data)))
        break

    # Item-based Collaborative Filtering
    item_cf = Item_based_CF(rating_train_user_item_interaction_data, user_item_matrix_data)
    item_item_correlation_data = item_cf.compute_correlation(corr_methods=one_similarity_method)

    for K in K_list:
        # 針對test data做預測以及模型評估（注意，每次計算是針對一筆資料）
        pred_user_data = list(map(lambda x: item_cf.predict_without_time(rating_test_user_item_interaction_data.iloc[x, 0], rating_test_user_item_interaction_data.iloc[x, 1], user_column_name="user_id", item_column_name="movie_id", num_item=K), tqdm([i for i in range(rating_test_user_item_interaction_data.shape[0])])))
        pred_user_data = [i if i > 0 else 0 for i in pred_user_data]
        CF_result[f"item-based_{one_similarity_method}_{K}"] = math.sqrt(mean_squared_error(y_true=rating_test_user_item_interaction_data["rating"].values, y_pred=np.array(pred_user_data)))
        break

100%|██████████| 889249/889249 [02:56<00:00, 5031.99it/s]
100%|██████████| 25000/25000 [16:57<00:00, 24.58it/s]
100%|██████████| 2829124/2829124 [03:38<00:00, 12976.67it/s]
  0%|          | 0/25000 [00:00<?, ?it/s]


TypeError: predict_without_time() got an unexpected keyword argument 'num_item'

# Matrix Factorization

In [42]:
epochs = 100
learning_rate = 1e-2
num_user_id = user_item_matrix_data.shape[0]
num_item_id = user_item_matrix_data.shape[1]
num_features = 10

model = matrix_factorization(true_user_item_matrix=rating_user_item_matrix_data, num_features=num_features)
model.fit(epochs=epochs, learning_rate=learning_rate, regularization_rate=1e-2, bias_or_not=True)

=== Epoch: 0 Train Loss: 19.762531966755976
=== Epoch: 1 Train Loss: 19.758328004473768
=== Epoch: 2 Train Loss: 19.754126135843592
=== Epoch: 3 Train Loss: 19.749926301319277
=== Epoch: 4 Train Loss: 19.745728536201653
=== Epoch: 5 Train Loss: 19.741532852134913
=== Epoch: 6 Train Loss: 19.737339200533388
=== Epoch: 7 Train Loss: 19.733147656251138
=== Epoch: 8 Train Loss: 19.728958176282323
=== Epoch: 9 Train Loss: 19.724770775831576
=== Epoch: 10 Train Loss: 19.720585429249923
=== Epoch: 11 Train Loss: 19.716402103571497
=== Epoch: 12 Train Loss: 19.712220846771533
=== Epoch: 13 Train Loss: 19.708041661143394
=== Epoch: 14 Train Loss: 19.70386455586727
=== Epoch: 15 Train Loss: 19.699689515737845
=== Epoch: 16 Train Loss: 19.69551650192479
=== Epoch: 17 Train Loss: 19.691345548046574
=== Epoch: 18 Train Loss: 19.68717663943268
=== Epoch: 19 Train Loss: 19.68300978905291
=== Epoch: 20 Train Loss: 19.67884501712736
=== Epoch: 21 Train Loss: 19.674682295752078
=== Epoch: 22 Train Loss:

In [43]:
model.evaluate(testdata=rating_test_result_data)

AttributeError: 'numpy.ndarray' object has no attribute 'iloc'

# Factorization Machine

In [None]:
# user_item_interaction的結構：<user_id, item_id, result, timestamp>

In [24]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = fm_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=num_features*3+3,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [25]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

=== Epoch: 0, Train Loss: 2425.652796149254
=== Epoch: 1, Train Loss: 758.0403942465782
=== Epoch: 2, Train Loss: 752.8756191730499


# IPNN

In [14]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = ipnn_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=num_features*3+3,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [15]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

=== Epoch: 0, Train Loss: 2269.737111568451
=== Epoch: 1, Train Loss: 744.1373466849327
=== Epoch: 2, Train Loss: 743.6776596307755


# GDBT+LR

# FNN

In [14]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = fnn_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=num_features*3+3,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [15]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

=== Epoch: 0, Train Loss: 1832.3123235106468
=== Epoch: 1, Train Loss: 745.8843268752098
=== Epoch: 2, Train Loss: 745.6503906846046


# OPNN

In [160]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = opnn_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=3*(num_features**2)+3,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [161]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

=== Epoch: 0, Train Loss: 816.1337397694588
=== Epoch: 1, Train Loss: 744.1182813048363
=== Epoch: 2, Train Loss: 743.7272185087204


# PIN

In [14]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = pin_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=3 * num_features,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [16]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        print(torch_user_age.size())
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([128, 8])
torch.Size([1

# CCPM

In [14]:
batch_size = 128
num_features = 10

# 1. 把所有東西包成dataloader
train_dataset = TensorDataset( torch.FloatTensor(train_user_age_onehotencoding),
                               torch.FloatTensor(train_user_occupation_onehotencoding),
                               torch.FloatTensor(train_movie_feature_data.values),
                               torch.FloatTensor(train_result_data) )
test_dataset = TensorDataset(  torch.FloatTensor(test_user_age_onehotencoding),
                               torch.FloatTensor(test_user_occupation_onehotencoding),
                               torch.FloatTensor(test_movie_feature_data.values),
                               torch.FloatTensor(test_result_data))
train_dataloader = DataLoader(train_dataset, batch_size=128)
test_dataloader = DataLoader(test_dataset, batch_size=128)


# 2. 呼叫模型與設定Loss function
model = pin_model(num_user_age=train_user_age_onehotencoding.shape[1], 
                 num_user_occupation=train_user_occupation_onehotencoding.shape[1], 
                 num_movie_genre=train_movie_feature_data.values.shape[1],
                 num_decoder=3 * num_features,
                 num_features=num_features)
loss_func = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [16]:
epochs = 3

train_each_iteration_loss = list()
train_loss_list = list()

for epoch in range(epochs):
    train_loss = 0.0
    for torch_user_age, torch_user_occupation, torch_movie_genre, torch_result in train_dataloader:
        yhat = model(user_age_feature=torch_user_age, 
                     user_occupation_feature=torch_user_occupation,
                     movie_genre_feature=torch_movie_genre)

        loss = loss_func(yhat, torch_result)
        train_loss += loss.item()
        train_each_iteration_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    train_loss_list.append(train_loss)
    print(f"=== Epoch: {epoch}, Train Loss: {train_loss}")

=== Epoch: 0, Train Loss: 743.2036992311478
=== Epoch: 1, Train Loss: 743.2006698846817
=== Epoch: 2, Train Loss: 743.1977047920227


# DeepFM