## 网络结构设计
1. 提取用户特征和电影特征作为网络的输入，其中：
    - 用户特征包含：性别、年龄和职业
    - 电影特征包含：电影名称、电影类型以及电影海报
2. 提取用户信息，使用Embedding层将用户特征映射为向量表示，然后输入到全连接层并相加
3. 提取电影信息，将电影类型映射为向量表示，电影名称和电影海报使用卷积层得到向量表示，然后输入到全连接层并相加
4. 得到用户和电影的向量表示后，计算二者的余弦相似度。最后，用该相似度和用户真实评分的均方差作为该回归模型的损失函数

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

## 1. 用户信息
### 1.1 提取性别特征

In [2]:
# 自定义一个用户性别数据
usr_gender_data = np.array((0, 1)).reshape(-1).astype('int64')
usr_gender_dict_size = 2
usr_gender_emb = nn.Embedding(num_embeddings=usr_gender_dict_size, embedding_dim=16)
usr_gender_fc = nn.Linear(in_features=16, out_features=16)

usr_gender_var = torch.from_numpy(usr_gender_data)
usr_gender_feat = usr_gender_fc(usr_gender_emb(usr_gender_var))
usr_gender_feat = F.relu(usr_gender_feat)
print(usr_gender_feat.shape)
print(usr_gender_feat)

torch.Size([2, 16])
tensor([[0.0000, 0.5396, 0.3582, 0.0000, 0.0000, 0.8593, 0.8885, 0.0000, 0.6045,
         0.4596, 0.0000, 0.0000, 0.3659, 0.0000, 0.5746, 0.2763],
        [0.0000, 0.0000, 0.0000, 0.2292, 0.0000, 0.0000, 0.5469, 0.0000, 0.0000,
         0.0000, 0.0000, 1.1382, 0.0000, 1.1761, 0.4018, 1.2456]],
       grad_fn=<ReluBackward0>)


### 1.2 提取用户年龄特征

In [3]:
# 自定义一个用户年龄数据
usr_age_data = np.array((1, 18)).reshape(-1).astype('int64')
usr_age_dict_size = 56 + 1
usr_age_emb = nn.Embedding(num_embeddings=usr_age_dict_size, embedding_dim=16)
usr_age_fc = nn.Linear(in_features=16, out_features=16)

usr_age_var = torch.from_numpy(usr_age_data)
usr_age_feat = usr_age_fc(usr_age_emb(usr_age_var))
usr_age_feat = F.relu(usr_age_feat)
print(usr_age_feat.shape)
print(usr_age_feat)

torch.Size([2, 16])
tensor([[0.0000, 0.0000, 0.2369, 0.0000, 0.0000, 1.3198, 0.6582, 0.0000, 0.1871,
         0.3894, 0.1162, 0.0000, 0.0282, 0.0000, 0.2151, 1.2145],
        [0.0000, 0.0000, 0.9539, 0.0000, 1.0452, 0.5438, 0.7035, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.1451]],
       grad_fn=<ReluBackward0>)


### 1.3 提取用户职业特征

In [4]:
usr_job_data = np.array((0, 20)).reshape(-1).astype('int64')
usr_job_dict_size = 20 + 1
usr_job_emb = nn.Embedding(num_embeddings=usr_job_dict_size, embedding_dim=16)
usr_job_fc = nn.Linear(in_features=16, out_features=16)

usr_job_var = torch.from_numpy(usr_job_data)
usr_job_feat = usr_job_fc(usr_job_emb(usr_job_var))
usr_job_feat = F.relu(usr_job_feat)
print(usr_job_feat.shape)
print(usr_job_feat)

torch.Size([2, 16])
tensor([[0.2584, 0.0000, 0.0359, 0.0000, 0.0000, 0.3537, 0.0000, 0.1387, 0.0361,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0245, 0.0305, 0.0000],
        [0.2721, 0.7482, 0.4157, 0.3370, 0.1461, 0.8518, 0.8119, 0.0000, 0.3327,
         0.0000, 0.0361, 0.5232, 0.0000, 0.0000, 0.0000, 0.6352]],
       grad_fn=<ReluBackward0>)


### 1.4 融合用户特征

In [5]:
fc_job = nn.Linear(in_features=16, out_features=200)
fc_age = nn.Linear(in_features=16, out_features=200)
fc_gender = nn.Linear(in_features=16, out_features=200)

gender_feat = F.tanh(fc_gender(usr_gender_feat))
age_feat = F.tanh(fc_age(usr_age_feat))
job_feat = F.tanh(fc_job(usr_job_feat))

usr_feat = gender_feat + age_feat + job_feat
print(usr_feat.shape)
print(usr_feat)

torch.Size([2, 200])
tensor([[-5.1335e-01, -1.5911e-01, -2.0008e-01, -1.5151e-02,  3.7745e-01,
         -3.5872e-01, -4.1245e-01,  4.1987e-01, -3.6836e-01,  1.6747e-01,
          3.5504e-01, -1.1205e-01, -5.4380e-03,  3.1931e-01, -4.7137e-01,
         -6.2006e-02,  7.3392e-01, -1.6450e-02, -1.8560e-01,  2.6359e-01,
          3.0754e-01, -2.2211e-02,  1.8506e-01, -3.9725e-01, -3.3545e-01,
          2.7002e-01, -1.3147e-01, -1.6463e-01,  1.4269e-01, -3.1910e-01,
         -6.6017e-01, -2.4616e-01, -8.8197e-02,  2.6558e-01,  6.8087e-01,
          7.1073e-01,  5.6292e-01, -2.9444e-01, -4.2485e-01, -2.1698e-01,
          4.4503e-01, -1.8938e-02,  2.9185e-01, -6.5213e-01,  5.4178e-01,
         -3.8499e-01,  5.7499e-01,  1.8843e-01, -7.4801e-02, -5.3716e-01,
          4.7628e-01, -5.3107e-01,  9.3963e-02,  3.7328e-01,  5.6909e-02,
          9.2058e-01,  3.5076e-01, -2.4744e-01,  5.4148e-01, -1.6832e-01,
         -8.3651e-02,  1.4196e-01, -5.2339e-01, -3.8508e-01, -5.5372e-01,
          5.6065e



## 2. 电影特征提取
### 2.1 提取电影类别特征

In [6]:
# 自定义电影类别数据
mov_cat_data = np.array([[1, 2, 3, 0, 0, 0], [2, 3, 4, 0, 0, 0]]).reshape(2, -1).astype('int64')
mov_cat_dict_size = 18 + 1
mov_cat_emb = nn.Embedding(num_embeddings=mov_cat_dict_size, embedding_dim=32)
mov_cat_fc = nn.Linear(in_features=32, out_features=32)

mov_cat_var = torch.from_numpy(mov_cat_data)
mov_cat_feat = mov_cat_emb(mov_cat_var)

# 沿着类别数量维度进行求和
mov_cat_feat = torch.sum(mov_cat_feat, dim=1, keepdim=False)
mov_cat_feat = mov_cat_fc(mov_cat_feat)
mov_cat_feat = F.relu(mov_cat_feat)
print(mov_cat_feat.shape)
print(mov_cat_feat)

torch.Size([2, 32])
tensor([[3.5934, 0.0000, 1.8565, 1.7676, 3.7018, 0.5713, 0.5355, 0.9211, 0.0000,
         0.0000, 0.8788, 0.0000, 0.0000, 0.0000, 0.1978, 4.8158, 2.4074, 1.7421,
         0.0000, 2.7812, 0.5020, 0.0000, 1.3836, 0.0000, 4.1446, 0.6200, 0.0612,
         0.0000, 0.0000, 6.7134, 1.0453, 2.6819],
        [2.8123, 0.0000, 3.0742, 1.2698, 3.1001, 0.1666, 0.0000, 0.6157, 0.0000,
         0.0000, 1.1570, 0.0146, 0.0000, 0.0000, 0.0000, 4.8261, 1.3017, 0.7606,
         0.0000, 3.1402, 0.0326, 0.0000, 1.3067, 0.0000, 3.7382, 0.0000, 0.0000,
         0.0000, 0.0000, 5.4824, 1.5428, 1.0031]], grad_fn=<ReluBackward0>)


### 2.2 提取电影名称特征

In [7]:
# 自定义电影名称数据
mov_tit_data = np.array([[1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]).reshape(2, 1, -1).astype('int64')
mov_tit_dict_size = 5216 + 1
mov_tit_emb = nn.Embedding(num_embeddings=mov_tit_dict_size, embedding_dim=32)
mov_tit_conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3, 1), stride=(2, 1))
mov_tit_conv2 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3, 1))

mov_tit_var = torch.from_numpy(mov_tit_data)
print('电影名称输入数据形状：', mov_tit_var.shape)

mov_tit_feat = mov_tit_emb(mov_tit_var)
print('经过Embedding层后的形状：', mov_tit_feat.shape)

mov_tit_feat = mov_tit_conv(mov_tit_feat)
print('经过第一层卷积后的形状：', mov_tit_feat.shape)

mov_tit_feat = mov_tit_conv2(mov_tit_feat)
print('经过第二层卷积后的形状：', mov_tit_feat.shape)

batch_size = mov_tit_feat.shape[0]
mov_tit_feat = torch.sum(mov_tit_feat, dim=2, keepdim=False)
print('经过reduce_sum降采样后：', mov_tit_feat.shape)

mov_tit_feat = F.relu(mov_tit_feat)
mov_tit_feat = mov_tit_feat.reshape(batch_size, -1)
print(mov_tit_feat.shape)

电影名称输入数据形状： torch.Size([2, 1, 15])
经过Embedding层后的形状： torch.Size([2, 1, 15, 32])
经过第一层卷积后的形状： torch.Size([2, 1, 7, 32])
经过第二层卷积后的形状： torch.Size([2, 1, 5, 32])
经过reduce_sum降采样后： torch.Size([2, 1, 32])
torch.Size([2, 32])


### 2.3 电影海报特征提取

In [8]:
from PIL import Image

poster1 = Image.open('./data/posters/mov_id2294.jpg').resize((64, 64))
poster2 = Image.open('./data/posters/mov_id2299.jpg').resize((64, 64))

# h, w, c -> c, h, w
poster1_data = np.array(poster1).transpose((2, 0, 1))
poster1_data = poster1_data/127.5 - 1
poster2_data = np.array(poster2).transpose((2, 0, 1))
poster2_data = poster2_data/127.5 - 1

posters_data = np.array([poster1_data, poster2_data]).astype('float')

poster_conv = nn.Sequential(
    # (2, 3, 64, 64) -> (2, 32, 32, 32)
    nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    # (2, 32, 32, 32) -> (2, 32, 16, 16)
    nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    # (2, 32, 16, 16) -> (2, 64, 8, 8)
    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    # (2, 64, 8, 8) -> (2, 64, 1, 1)
    nn.AdaptiveAvgPool2d(output_size=(1, 1))
)
poster_fc = nn.Linear(in_features=64, out_features=32)

posters_var = torch.from_numpy(posters_data).float()
posters_feat = poster_conv(posters_var)
batch_size = posters_feat.shape[0]
posters_feat = posters_feat.reshape(batch_size, -1)
posters_feat = poster_fc(posters_feat)
print(posters_feat.shape)

torch.Size([2, 32])


### 2.4 融合电影特征

In [9]:
mov_combined = nn.Linear(in_features=96, out_features=200)

mov_feats = [mov_cat_feat, mov_tit_feat, posters_feat]
mov_feats = torch.cat(mov_feats, dim=1)
mov_feats = mov_combined(mov_feats)
print(mov_feats.shape)

torch.Size([2, 200])


## 3. 相似度计算

In [10]:
def similarty(usr_feature, mov_feature):
    res = F.cosine_similarity(usr_feature, mov_feature)
    res = 5 * res
    return res

_sim = similarty(usr_feat, mov_feats)
print(_sim)

tensor([0.1667, 0.1522], grad_fn=<MulBackward0>)
