In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter
import tensorflow as tf

import os
import pickle
import re
from tensorflow.python.ops import math_ops

In [2]:
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm#进度条工具
import zipfile
import hashlib

In [3]:
data_dir = '.'

In [4]:
#处理数据 处理样式：
#UserID、Occupation和MovieID不用变。
#Gender字段：需要将‘F’和‘M’转换成0和1。
#Age字段：要转成7个连续数字0~6。
#Genres字段：是分类字段，要转成数字。首先将Genres中的类别转成字符串到数字的字典，然后再将每个电影的Genres字段转成数字列表，因为有些电影是多个Genres的组合。
#Title字段：处理方式跟Genres字段一样，首先创建文本到数字的字典，然后将Title中的描述转成数字的列表。另外Title中的年份也需要去掉。
#Genres和Title字段需要将长度统一，这样在神经网络中方便处理。空白部分用‘< PAD >’对应的数字填充
def load_data(data_dir):
    #User数据
    users_title = ['UserID', 'Gender', 'Age', 'JobID', 'Zip-code']
    users = pd.read_table(data_dir+'/users.dat', sep='::', header=None, names=users_title, engine='python')
    users = users.filter(regex='UserID|Gender|Age|JobID')
    #转化为narray
    users_orig = users.values
    #改变User数据中性别和年龄
    gender_map = {'F':0, 'M':1}
    users['Gender'] = users['Gender'].map(gender_map)
    #构造年龄映射
    age_map = {val:ii 
               for ii,val in enumerate(set(users['Age']))}
    users['Age'] = users['Age'].map(age_map)
    
    #读取Movie数据集
    movies_title = ['MovieID', 'Title', 'Genres']
    movies = pd.read_table(data_dir+'/movies.dat', sep='::', header=None, names=movies_title, engine = 'python')
    movies_orig = movies.values
    #将Title中的年份去掉
    pattern = re.compile(r'^(.*)\((\d+)\)$')
    title_map = {val:pattern.match(val).group(1) for ii,val in enumerate(set(movies['Title']))}
    movies['Title'] = movies['Title'].map(title_map)
    #电影类型转数字字典
    genres_set = set()
    for val in movies['Genres'].str.split('|'):
        genres_set.update(val)
    
    genres_set.add('<PAD>')#????< PAD >
    genres2int = {val:ii 
                  for ii, val in enumerate(genres_set)}
    #样式：{'comedy':1,'tragedy':2....}
    #将电影类型转成等长数字列表，长度是18
    genres_map = {val:[genres2int[row] 
                       for row in val.split('|')] 
                          for ii,val in enumerate(set(movies['Genres']))}
    movies['Genres'] = movies['Genres'].map(genres_map)
    #geners_map样式{'a|b|c':[num1,num2,num3]}
    #如{'Comedy|Drama': [6, 10], 'Adventure|Fantasy|Romance': [16, 3, 13], 'Documentary': [2], 'Sci-Fi|Thriller': [17, 4], 'Horror|Romance': [0, 13], 'Action': [8], "Children's|Comedy|Drama": [9, 6, 10], 'Action|Mystery|Thriller': [8, 5, 4],
    #将没有的类别填充
    for key in genres_map:
        for cnt in range(max(genres2int.values()) - len(genres_map[key])):
            genres_map[key].insert(len(genres_map[key]) + cnt,genres2int['<PAD>'])
    #genres_map样式 {'Comedy|Drama': [6, 10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14],...}
    
    #电影Title转数字字典
    title_set = set()
    for val in movies['Title'].str.split():
        title_set.update(val)
    #title_set是一个单词库
    title_set.add('<PAD>')
    title2int = {val:ii 
                 for ii, val in enumerate(title_set)}
    #将电影Title转成等长数字列表，长度是15
    title_count = 15
    title_map = {val:[title2int[row] 
                      for row in val.split()] 
                         for ii,val in enumerate(set(movies['Title']))}
    
    for key in title_map:
        for cnt in range(title_count - len(title_map[key])):
            title_map[key].insert(len(title_map[key]) + cnt,title2int['<PAD>'])
    
    movies['Title'] = movies['Title'].map(title_map)
    #movies['Title'][1]=[3928, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102, 2102] 
    #表示某电影名引用了词库中哪几个
    #读取评分数据集
    ratings_title = ['UserID','MovieID', 'Rating', 'timestamps']
    ratings = pd.read_table(data_dir+'/ratings.dat', sep='::', header=None, names=ratings_title, engine = 'python')
    #ratings.head()
    ratings = ratings.filter(regex='UserID|MovieID|Rating')
    
    #合并三个表
    data = pd.merge(pd.merge(ratings, users), movies)
    #data样式：  
#             UserID  MovieID  Rating  Gender  Age         Title  （就是所在词库位置）                                            Genres                                                                                      
#0             1     1193       5       0      0           [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...    Drama  
#1             2     1193       5       1       5          [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...    Drama 
    
    #将数据分成X和y两张表
    target_fields = 'Rating'
    features_pd, targets_pd = data.drop(target_fields, axis=1), data[target_fields]#分成特征和目标
    features = features_pd.values
    targets_values = targets_pd.values
    #预处理将表格变成了纯数字型的表格
    return title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig

In [5]:
title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data(data_dir)
#查看数据样式
data.iloc[1]
#features[1]
#UserID                                                     2
# MovieID                                                 1193
# Rating                                                     5
# Gender                                                     1
# Age                                                        5
# Title      [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...
# Genres     [10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1...
# Name: 1, dtype: object

UserID                                                     2
MovieID                                                 1193
Rating                                                     5
Gender                                                     1
Age                                                        5
JobID                                                     16
Title      [412, 3834, 1057, 3689, 537, 3898, 132, 132, 1...
Genres     [14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1...
Name: 1, dtype: object

In [6]:
import tensorflow as tf
import os
import pickle#对象序列化的库
#保存和启动模型模型
def save_params(params):
    """
    Save parameters to file
    """
    pickle.dump(params, open('params.p', 'wb'))

def load_params():
    """
    Load parameters from file
    """
    return pickle.load(open('params.p', mode='rb'))

In [6]:
#嵌入矩阵的维度
embed_dim = 32
#narray.take(num,1)取第num列内容
#用户ID个数
uid_max = max(features.take(0,1)) + 1 # 6040  #take用来提取narray中的元素

#性别个数
gender_max = max(features.take(2,1)) + 1 # 1 + 1 = 2

#年龄类别个数
age_max = max(features.take(3,1)) + 1 # 6 + 1 = 7

#职业个数
job_max = max(features.take(4,1))+1# 20 + 1 = 21
print(job_max)
#电影ID个数  
movie_id_max = max(features.take(1,1))+1# 3952

#电影类型个数

movie_categories_max = max(genres2int.values())+ 1 # 18 + 1 = 19 
#电影名单词个数
movie_title_max = len(title_set) # 5216

#对电影类型嵌入向量做加和操作的标志，考虑过使用mean做平均，但是没实现mean
combiner = "sum"

#电影名长度
sentences_size = title_count # = 15
#文本卷积滑动窗口，分别滑动2, 3, 4, 5个单词
window_sizes = {2, 3, 4, 5}
#文本卷积核数量
filter_num = 8

#id映射字典 （raw:inner）
movieid2idx = {val[0]:i for i, val in enumerate(movies.values)}

21


In [7]:
# Number of Epochs 
num_epochs = 5
# Batch Size
batch_size = 256  #?

dropout_keep = 0.5
# 学习率
learning_rate = 0.0001
# Show stats for every n number of batches
show_every_n_batches = 20

save_dir = './save'

In [16]:
#定义输入的占位符(构建图)
#0 UserID                                                     2
#1 MovieID                                                 1193
# Rating                                                     5
#2 Gender                                                     1
#3 Age                                                        5
#4 Title      [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...
#5 Genres     [10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1...
# Name: 1, dtype: object
#创建输入层占位符函数
def get_inputs():#movies_titles是类似热编码
    uid = tf.placeholder(tf.int32, [None, 1], name="uid")
    user_gender = tf.placeholder(tf.int32, [None, 1], name="user_gender")
    user_age = tf.placeholder(tf.int32, [None, 1], name="user_age")
    user_job = tf.placeholder(tf.int32, [None, 1], name="user_job")
    
    movie_id = tf.placeholder(tf.int32, [None, 1], name="movie_id")
    movie_categories = tf.placeholder(tf.int32, [None, 18], name="movie_categories")
    movie_titles = tf.placeholder(tf.int32, [None, 15], name="movie_titles")
    targets = tf.placeholder(tf.int32, [None, 1], name="targets")
    LearningRate = tf.placeholder(tf.float32, name = "LearningRate")
    dropout_keep_prob = tf.placeholder(tf.float32, name = "dropout_keep_prob")
    return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, LearningRate, dropout_keep_prob

In [17]:
#构建神经网络
#定义User的嵌入矩阵  （初始化嵌入层权重）  
def get_user_embedding(uid, user_gender, user_age, user_job):
    with tf.name_scope("user_embedding"):#tf名称空间
        uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name = "uid_embed_matrix") #6040*32#？
        uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name = "uid_embed_layer")

        gender_embed_matrix = tf.Variable(tf.random_uniform([gender_max, embed_dim // 2], -1, 1), name= "gender_embed_matrix") 
        gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name = "gender_embed_layer")
 
        age_embed_matrix = tf.Variable(tf.random_uniform([age_max, embed_dim // 2], -1, 1), name="age_embed_matrix")
        age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name="age_embed_layer")
 
        job_embed_matrix = tf.Variable(tf.random_uniform([job_max, embed_dim // 2], -1, 1), name = "job_embed_matrix")
        job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name = "job_embed_layer")
    return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer

In [18]:
#将User的嵌入矩阵一起全连接生成User的特征
def get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer): 
    with tf.name_scope("user_fc"):#创建名称空间
        #第一层全连接  80*128  转化后为 _*128 激活函数为relu
        uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name = "uid_fc_layer", activation=tf.nn.relu)#uid_embed_layer激活层
        gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name = "gender_fc_layer", activation=tf.nn.relu)
        age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name ="age_fc_layer", activation=tf.nn.relu)
        job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name = "job_fc_layer", activation=tf.nn.relu)
        #神经元数embed_layer
        #第二层全连接
        user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2)  #(?, 1, 128)组合起来
        user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh)  #(?, 1, 200)神经元数200
         #fully_connected创建一个名为的变量weights，表示一个完全连接的权重矩阵，乘以它inputs以产生一个 Tensor隐藏单位
        user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])
    return user_combine_layer, user_combine_layer_flat
#定义Movie ID的嵌入矩阵  
def get_movie_id_embed_layer(movie_id):
    with tf.name_scope("movie_embedding"):  
        movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), name = "movie_id_embed_matrix")
        movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name = "movie_id_embed_layer")
    return movie_id_embed_layer

In [19]:
#对电影类型的多个嵌入向量做加和
def get_movie_categories_layers(movie_categories):#???????????????可能电影种类load错了
    with tf.name_scope("movie_categories_layers"):
        movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1), name = "movie_categories_embed_matrix")
        movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories, name = "movie_categories_embed_layer")#?
        
        if combiner == "sum":
            movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keep_dims=True)
    #     elif combiner == "mean":
 
    return movie_categories_embed_layer

In [20]:
#Movie Title的文本卷积网络实现
def get_movie_cnn_layer(movie_titles): #movie_titles是类似热编码 [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...
    #从嵌入矩阵中得到电影名对应的各个单词的嵌入向量
    with tf.name_scope("movie_embedding"):
        movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1), name = "movie_title_embed_matrix")
        movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles, name = "movie_title_embed_layer")#？
        movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1)#通道数为1
        
        # 't' is a tensor of shape [2]
         #shape(expand_dims(t, -1)) ==> [2, 1]
            
    #对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化
    pool_layer_lst = []
    for window_size in window_sizes:
        with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)):#做卷积运算和做池化
            filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num],stddev=0.1),name = "filter_weights")
            filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")
 
            conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1,1,1,1], padding="VALID", name="conv_layer")
            relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer,filter_bias), name ="relu_layer")
 
            maxpool_layer = tf.nn.max_pool(relu_layer, [1,sentences_size - window_size + 1 ,1,1], [1,1,1,1], padding="VALID", name="maxpool_layer")
            pool_layer_lst.append(maxpool_layer)#所有池化成的二维矩阵
 
    #Dropout层
    with tf.name_scope("pool_dropout"):#？
        pool_layer = tf.concat(pool_layer_lst, 3, name ="pool_layer")
        max_num = len(window_sizes) * filter_num
        pool_layer_flat = tf.reshape(pool_layer , [-1, 1, max_num], name = "pool_layer_flat")
 
        dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name = "dropout_layer")
    return pool_layer_flat, dropout_layer

In [21]:
#将Movie的各个层一起做全连接
def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer):
    with tf.name_scope("movie_fc"):
        #第一层全连接
        movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name = "movie_id_fc_layer", activation=tf.nn.relu)
        movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim, name = "movie_categories_fc_layer", activation=tf.nn.relu)
 
        #第二层全连接
        movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer], 2)  #(?, 1, 96)
        movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh)  #(?, 1, 200)
 
        movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])
    return movie_combine_layer, movie_combine_layer_flat
#构建计算图
tf.reset_default_graph()
train_graph = tf.Graph()
with train_graph.as_default():
    #获取输入占位符
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob = get_inputs()
    #获取User的4个嵌入向量
    uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer = get_user_embedding(uid, user_gender, user_age, user_job)
    #得到用户特征
    user_combine_layer, user_combine_layer_flat = get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer)
    #获取电影ID的嵌入向量
    movie_id_embed_layer = get_movie_id_embed_layer(movie_id)
    #获取电影类型的嵌入向量
    movie_categories_embed_layer = get_movie_categories_layers(movie_categories)
    #获取电影名的特征向量
    pool_layer_flat, dropout_layer = get_movie_cnn_layer(movie_titles)
    #得到电影特征
    movie_combine_layer, movie_combine_layer_flat = get_movie_feature_layer(movie_id_embed_layer, 
                                                                                movie_categories_embed_layer, 
                                                                                dropout_layer)
    #计算出评分，要注意两个不同的方案，inference的名字（name值）是不一样的，后面做推荐时要根据name取得tensor
    with tf.name_scope("inference"):
        #将用户特征和电影特征作为输入，经过全连接，输出一个值的方案
#         inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1)  #(?, 200)
#         inference = tf.layers.dense(inference_layer, 1,
#                                     kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), 
#                                     kernel_regularizer=tf.nn.l2_loss, name="inference")
        #简单的将用户特征和电影特征做矩阵乘法得到一个预测评分
        inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat))#[1*200]*[1*200]
 
    with tf.name_scope("loss"):
        # MSE损失，将计算值回归到评分
        cost = tf.losses.mean_squared_error(targets, inference )
        loss = tf.reduce_mean(cost)
    # 优化损失 
#     train_op = tf.train.AdamOptimizer(lr).minimize(loss)  #cost
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(lr)#lr学习率
    gradients = optimizer.compute_gradients(loss)  #cost
    train_op = optimizer.apply_gradients(gradients, global_step=global_step)# global_step全局脚步？改变学习率的参数？

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [22]:
def get_batches(Xs, ys, batch_size):
    for start in range(0, len(Xs), batch_size):
        end = min(start + batch_size, len(Xs))
        yield Xs[start:end], ys[start:end]
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import time
import datetime
#0 UserID                                                     2
#1 MovieID                                                 1193
# Rating                                                     5
#2 Gender                                                     1
#3 Age                                                        5
#4 Title      [4094, 4032, 1255, 4199, 3605, 90, 2102, 2102,...
#5 Genres     [10, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 1...
# Name: 1, dtype: object
losses = {'train':[], 'test':[]}
 #train_graph是default_graph
with tf.Session(graph=train_graph) as sess:
#搜集数据给tensorBoard用
    # Keep track of gradient values and sparsity
    grad_summaries = []#梯度图
    for g, v in gradients:
        if g is not None:
            grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name.replace(':', '_')), g)#柱状图
            sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name.replace(':', '_')), tf.nn.zero_fraction(g))
            grad_summaries.append(grad_hist_summary)
            grad_summaries.append(sparsity_summary)
    grad_summaries_merged = tf.summary.merge(grad_summaries)
 
    # Output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))
 
    # Summaries for loss and accuracy
    loss_summary = tf.summary.scalar("loss", loss)#将【计算图】中的【标量数据】写入TensorFlow中的【日志文件】，以便为将来tensorboard的可视化做准备
 
    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])#选择要显示的信息
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
 
    # Inference summaries
    inference_summary_op = tf.summary.merge([loss_summary])#选择要显示的信息
    #写入文件
    inference_summary_dir = os.path.join(out_dir, "summaries", "inference")
    inference_summary_writer = tf.summary.FileWriter(inference_summary_dir, sess.graph)
 
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()#循环训练要用到的模型保存器  
    #定期调用 saver.save() 方法，向文件夹中写入包含了当前模型中所有可训练变量的 checkpoint 文件。
    for epoch_i in range(num_epochs): #交叉验证次数5次
        #将数据集分成训练集和测试集，随机种子不固定
        train_X,test_X, train_y, test_y = train_test_split(features,  
                                                           targets_values,  
                                                           test_size = 0.2,  
                                                           random_state = 0)  
 
        train_batches = get_batches(train_X, train_y, batch_size)#一个元组
        test_batches = get_batches(test_X, test_y, batch_size)
 
        #训练的迭代，保存训练损失
        for batch_i in range(len(train_X) // batch_size):#训练集分节  以步长为256放入模型，每次的道一些诸如loss等模型评估状态
            x, y = next(train_batches)
            categories = np.zeros([batch_size, 18])
            for i in range(batch_size):
                categories[i] = x.take(6,1)[i]
 
            titles = np.zeros([batch_size, sentences_size])
            for i in range(batch_size):
                titles[i] = x.take(5,1)[i]#取title对应列
             #喂数据
            feed = {
                uid: np.reshape(x.take(0,1), [batch_size, 1]),
                user_gender: np.reshape(x.take(2,1), [batch_size, 1]),
                user_age: np.reshape(x.take(3,1), [batch_size, 1]),
                user_job: np.reshape(x.take(4,1), [batch_size, 1]),
                movie_id: np.reshape(x.take(1,1), [batch_size, 1]),
                movie_categories: categories,  #x.take(6,1)
                movie_titles: titles,  #x.take(5,1)
                targets: np.reshape(y, [batch_size, 1]),
                dropout_keep_prob: dropout_keep, #dropout_keep
                lr: learning_rate}
             #模型返回 ：多少个数据，num_batch次训练后的损失，日志 ,_(?)
            step, train_loss, summaries, _ = sess.run([global_step, loss, train_summary_op, train_op], feed)  #cost
            losses['train'].append(train_loss)#每一个损失都记录，每个损失由256个输入训练得到
            train_summary_writer.add_summary(summaries, step)  #
 
            # Show every <show_every_n_batches> batches
            if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                time_str = datetime.datetime.now().isoformat()
                print('{}: Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
                    time_str,
                    epoch_i,
                    batch_i,
                    (len(train_X) // batch_size),
                    train_loss))
 
        #使用测试数据的迭代
        for batch_i  in range(len(test_X) // batch_size):
            x, y = next(test_batches)
 
            categories = np.zeros([batch_size, 18])
            for i in range(batch_size):
                categories[i] = x.take(6,1)[i]
 
            titles = np.zeros([batch_size, sentences_size])
            for i in range(batch_size):
                titles[i] = x.take(5,1)[i]
 
            feed = {
                uid: np.reshape(x.take(0,1), [batch_size, 1]),
                user_gender: np.reshape(x.take(2,1), [batch_size, 1]),
                user_age: np.reshape(x.take(3,1), [batch_size, 1]),
                user_job: np.reshape(x.take(4,1), [batch_size, 1]),
                movie_id: np.reshape(x.take(1,1), [batch_size, 1]),
                movie_categories: categories,  #x.take(6,1)
                movie_titles: titles,  #x.take(5,1)
                targets: np.reshape(y, [batch_size, 1]),
                dropout_keep_prob: 1,
                lr: learning_rate}
 
            step, test_loss, summaries = sess.run([global_step, loss, inference_summary_op], feed)  #cost
             
            #保存测试损失
            losses['test'].append(test_loss)
            inference_summary_writer.add_summary(summaries, step)  #
 
            time_str = datetime.datetime.now().isoformat()
            if (epoch_i * (len(test_X) // batch_size) + batch_i) % show_every_n_batches == 0:
                print('{}: Epoch {:>3} Batch {:>4}/{}   test_loss = {:.3f}'.format(
                    time_str,
                    epoch_i,
                    batch_i,
                    (len(test_X) // batch_size),
                    test_loss))
 
    # Save Model
    saver.save(sess, save_dir)  #, global_step=epoch_i
    print('Model Trained and Saved')

Writing to C:\Users\abc\Desktop\model\runs\1545901065

2018-12-27T16:57:48.344203: Epoch   0 Batch    0/3125   train_loss = 17.714
2018-12-27T16:57:49.551975: Epoch   0 Batch   20/3125   train_loss = 4.439
2018-12-27T16:57:50.811609: Epoch   0 Batch   40/3125   train_loss = 2.912
2018-12-27T16:57:51.989459: Epoch   0 Batch   60/3125   train_loss = 2.321
2018-12-27T16:57:53.155343: Epoch   0 Batch   80/3125   train_loss = 2.199
2018-12-27T16:57:54.387050: Epoch   0 Batch  100/3125   train_loss = 1.893
2018-12-27T16:57:55.659648: Epoch   0 Batch  120/3125   train_loss = 1.925
2018-12-27T16:57:56.986105: Epoch   0 Batch  140/3125   train_loss = 1.701
2018-12-27T16:57:58.303580: Epoch   0 Batch  160/3125   train_loss = 1.593
2018-12-27T16:57:59.488414: Epoch   0 Batch  180/3125   train_loss = 1.583
2018-12-27T16:58:00.692196: Epoch   0 Batch  200/3125   train_loss = 1.735
2018-12-27T16:58:01.864062: Epoch   0 Batch  220/3125   train_loss = 1.674
2018-12-27T16:58:03.052884: Epoch   0 Batch 

2018-12-27T17:01:58.308643: Epoch   1 Batch  435/3125   train_loss = 1.281
2018-12-27T17:01:59.446600: Epoch   1 Batch  455/3125   train_loss = 1.239
2018-12-27T17:02:00.633428: Epoch   1 Batch  475/3125   train_loss = 1.324
2018-12-27T17:02:01.742463: Epoch   1 Batch  495/3125   train_loss = 1.141
2018-12-27T17:02:02.856485: Epoch   1 Batch  515/3125   train_loss = 1.235
2018-12-27T17:02:04.021371: Epoch   1 Batch  535/3125   train_loss = 1.266
2018-12-27T17:02:05.255072: Epoch   1 Batch  555/3125   train_loss = 1.387
2018-12-27T17:02:06.507723: Epoch   1 Batch  575/3125   train_loss = 1.269
2018-12-27T17:02:07.638701: Epoch   1 Batch  595/3125   train_loss = 1.336
2018-12-27T17:02:08.763694: Epoch   1 Batch  615/3125   train_loss = 1.156
2018-12-27T17:02:09.897661: Epoch   1 Batch  635/3125   train_loss = 1.245
2018-12-27T17:02:11.016670: Epoch   1 Batch  655/3125   train_loss = 1.147
2018-12-27T17:02:12.155628: Epoch   1 Batch  675/3125   train_loss = 1.057
2018-12-27T17:02:13.30954

2018-12-27T17:05:42.669136: Epoch   2 Batch  910/3125   train_loss = 1.203
2018-12-27T17:05:43.920789: Epoch   2 Batch  930/3125   train_loss = 1.270
2018-12-27T17:05:45.100636: Epoch   2 Batch  950/3125   train_loss = 1.146
2018-12-27T17:05:46.408471: Epoch   2 Batch  970/3125   train_loss = 1.266
2018-12-27T17:05:47.558397: Epoch   2 Batch  990/3125   train_loss = 1.089
2018-12-27T17:05:48.716301: Epoch   2 Batch 1010/3125   train_loss = 1.386
2018-12-27T17:05:49.903129: Epoch   2 Batch 1030/3125   train_loss = 1.102
2018-12-27T17:05:51.044078: Epoch   2 Batch 1050/3125   train_loss = 1.177
2018-12-27T17:05:52.255840: Epoch   2 Batch 1070/3125   train_loss = 1.180
2018-12-27T17:05:53.417732: Epoch   2 Batch 1090/3125   train_loss = 1.220
2018-12-27T17:05:54.549708: Epoch   2 Batch 1110/3125   train_loss = 1.338
2018-12-27T17:05:55.866188: Epoch   2 Batch 1130/3125   train_loss = 1.212
2018-12-27T17:05:57.165714: Epoch   2 Batch 1150/3125   train_loss = 1.161
2018-12-27T17:05:58.42335

2018-12-27T17:09:38.460059: Epoch   3 Batch 1385/3125   train_loss = 1.103
2018-12-27T17:09:39.828401: Epoch   3 Batch 1405/3125   train_loss = 1.084
2018-12-27T17:09:41.083047: Epoch   3 Batch 1425/3125   train_loss = 1.192
2018-12-27T17:09:42.237960: Epoch   3 Batch 1445/3125   train_loss = 1.283
2018-12-27T17:09:43.469668: Epoch   3 Batch 1465/3125   train_loss = 1.133
2018-12-27T17:09:44.694393: Epoch   3 Batch 1485/3125   train_loss = 1.175
2018-12-27T17:09:46.075702: Epoch   3 Batch 1505/3125   train_loss = 0.999
2018-12-27T17:09:47.380333: Epoch   3 Batch 1525/3125   train_loss = 1.024
2018-12-27T17:09:48.515298: Epoch   3 Batch 1545/3125   train_loss = 1.038
2018-12-27T17:09:49.770941: Epoch   3 Batch 1565/3125   train_loss = 1.159
2018-12-27T17:09:51.067476: Epoch   3 Batch 1585/3125   train_loss = 1.148
2018-12-27T17:09:52.656229: Epoch   3 Batch 1605/3125   train_loss = 0.997
2018-12-27T17:09:54.130589: Epoch   3 Batch 1625/3125   train_loss = 1.147
2018-12-27T17:09:55.62658

2018-12-27T17:13:34.079718: Epoch   4 Batch 1840/3125   train_loss = 1.091
2018-12-27T17:13:35.170801: Epoch   4 Batch 1860/3125   train_loss = 1.109
2018-12-27T17:13:36.235953: Epoch   4 Batch 1880/3125   train_loss = 1.058
2018-12-27T17:13:37.290135: Epoch   4 Batch 1900/3125   train_loss = 1.003
2018-12-27T17:13:38.354290: Epoch   4 Batch 1920/3125   train_loss = 1.063
2018-12-27T17:13:39.452355: Epoch   4 Batch 1940/3125   train_loss = 1.019
2018-12-27T17:13:40.524489: Epoch   4 Batch 1960/3125   train_loss = 1.003
2018-12-27T17:13:41.664441: Epoch   4 Batch 1980/3125   train_loss = 1.110
2018-12-27T17:13:42.809384: Epoch   4 Batch 2000/3125   train_loss = 1.185
2018-12-27T17:13:43.980250: Epoch   4 Batch 2020/3125   train_loss = 1.128
2018-12-27T17:13:45.197996: Epoch   4 Batch 2040/3125   train_loss = 0.980
2018-12-27T17:13:46.437728: Epoch   4 Batch 2060/3125   train_loss = 0.942
2018-12-27T17:13:47.646501: Epoch   4 Batch 2080/3125   train_loss = 1.241
2018-12-27T17:13:48.84828

In [8]:
#获取 Tensors
def get_tensors(loaded_graph):
#怎样load这个graph和得到变量
    uid = loaded_graph.get_tensor_by_name("uid:0")
    user_gender = loaded_graph.get_tensor_by_name("user_gender:0")
    user_age = loaded_graph.get_tensor_by_name("user_age:0")
    user_job = loaded_graph.get_tensor_by_name("user_job:0")
    movie_id = loaded_graph.get_tensor_by_name("movie_id:0")
    movie_categories = loaded_graph.get_tensor_by_name("movie_categories:0")
    movie_titles = loaded_graph.get_tensor_by_name("movie_titles:0")
    targets = loaded_graph.get_tensor_by_name("targets:0")
    dropout_keep_prob = loaded_graph.get_tensor_by_name("dropout_keep_prob:0")
    lr = loaded_graph.get_tensor_by_name("LearningRate:0")
    #两种不同计算预测评分的方案使用不同的name获取tensor inference
#     inference = loaded_graph.get_tensor_by_name("inference/inference/BiasAdd:0")
    inference = loaded_graph.get_tensor_by_name("inference/MatMul:0")#
    movie_combine_layer_flat = loaded_graph.get_tensor_by_name("movie_fc/Reshape:0")
    user_combine_layer_flat = loaded_graph.get_tensor_by_name("user_fc/Reshape:0")
    return uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference, movie_combine_layer_flat, user_combine_layer_flat
#inference是预测评分

In [13]:
#指定用户和电影进行评分
#对网络做正向传播，计算得到预测的评分  #meta文件就是模型

def rating_movie(user_id_val, movie_id_val):#user_id_val, movie_id_val？
    loaded_graph = tf.Graph()  #
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph('save.meta')
        loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))
        #sess里面有了参数
        # Get Tensors from loaded model
        uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference,_, __ = get_tensors(loaded_graph)  #loaded_graph

        categories = np.zeros([1, 18])
        categories[0] = movies.values[movieid2idx[movie_id_val]][2]#movies是转化过的的dataframe
        #movies样式为 id  name  categories 
        
        titles = np.zeros([1, sentences_size])
        titles[0] = movies.values[movieid2idx[movie_id_val]][1]

        feed = {
              uid: np.reshape(users.values[user_id_val-1][0], [1, 1]),
              user_gender: np.reshape(users.values[user_id_val-1][1], [1, 1]),
              user_age: np.reshape(users.values[user_id_val-1][2], [1, 1]),
              user_job: np.reshape(users.values[user_id_val-1][3], [1, 1]),
              movie_id: np.reshape(movies.values[movieid2idx[movie_id_val]][0], [1, 1]),
              movie_categories: categories,  #x.take(6,1)
              movie_titles: titles,  #x.take(5,1)
              dropout_keep_prob: 1}

        # Get Prediction
        inference_val = sess.run([inference], feed)  #inference是预测评分  正向传播title和id

        return (inference_val)
#rating_movie(123,1401)#指定用户对指定电影的预测评分

1
INFO:tensorflow:Restoring parameters from C:\Users\abc\Desktop\model\save


[array([[5.7040863]], dtype=float32)]

In [9]:
#生成Movie特征矩阵
#将训练好的电影特征组合成电影特征矩阵并保存到本地
import tensorflow as tf
loaded_graph = tf.Graph()  #
movie_matrics = []
with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
    loader = tf.train.import_meta_graph('save.meta')
    loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))
    # Get Tensors from loaded model
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, movie_combine_layer_flat, __ = get_tensors(loaded_graph)  #loaded_graph

    for item in movies.values:
        categories = np.zeros([1, 18])
        categories[0] = item.take(2)

        titles = np.zeros([1, sentences_size])
        titles[0] = item.take(1)

        feed = {
            movie_id: np.reshape(item.take(0), [1, 1]),
            movie_categories: categories,  #x.take(6,1)
            movie_titles: titles,  #x.take(5,1)
            dropout_keep_prob: 1}
        #嵌入层运算
        movie_combine_layer_flat_val = sess.run([movie_combine_layer_flat], feed)  #movie_combine_layer_flat自动完成矩阵乘法，concat，全连接神经一层
        #和文本卷积，concat，全连接
        movie_matrics.append(movie_combine_layer_flat_val)#得到每组电影id categories title的特征

pickle.dump((np.array(movie_matrics).reshape(-1, 200)), open('movie_matrics.p', 'wb'))
movie_matrics = pickle.load(open('movie_matrics.p', mode='rb'))
#生成User特征矩阵
#将训练好的用户特征组合成用户特征矩阵并保存到本地
loaded_graph = tf.Graph()  #
users_matrics = []
with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
    loader = tf.train.import_meta_graph('save.meta')
    loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))

    # Get Tensors from loaded model
    uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, __,user_combine_layer_flat = get_tensors(loaded_graph)  #loaded_graph

    for item in users.values:

        feed = {
            uid: np.reshape(item.take(0), [1, 1]),
            user_gender: np.reshape(item.take(1), [1, 1]),
            user_age: np.reshape(item.take(2), [1, 1]),
            user_job: np.reshape(item.take(3), [1, 1]),
            dropout_keep_prob: 1}

        user_combine_layer_flat_val = sess.run([user_combine_layer_flat], feed)  
        users_matrics.append(user_combine_layer_flat_val)

pickle.dump((np.array(users_matrics).reshape(-1, 200)), open('users_matrics.p', 'wb'))
users_matrics = pickle.load(open('users_matrics.p', mode='rb'))

INFO:tensorflow:Restoring parameters from C:\Users\abc\Desktop\model\save
INFO:tensorflow:Restoring parameters from C:\Users\abc\Desktop\model\save


In [24]:
#
#使用生产的用户特征矩阵和电影特征矩阵做电影推荐

#1、推荐同类型的电影
#思路是计算当前看的电影特征向量与整个电影特征矩阵的余弦相似度，取相似度最大的top_k个，
#这里加了些随机选择在里面，保证每次的推荐稍稍有些不同。

def recommend_same_type_movie(movie_id_val, top_k = 20):#
    loaded_graph = tf.Graph()  #
    file_dir = r'C:\Users\abc\Desktop\model\same_type_recommend.txt'
    file = open(file_dir,'w',encoding = 'utf-8')
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph('save.meta')
        loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))
        
        for i in movie_id_val :
            file.write(str(movies_orig[int(movieid2idx[i])][1])+'\t')
            #movie_matrics每组电影id categories title的特征   除以模长
            norm_movie_matrics = tf.sqrt(tf.reduce_sum(tf.square(movie_matrics), 1, keep_dims=True))
            normalized_movie_matrics = movie_matrics / norm_movie_matrics

            #推荐同类型的电影
            probs_embeddings = (movie_matrics[movieid2idx[i]]).reshape([1, 200])#另一个电影的特征向量
            probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalized_movie_matrics))
            sim = (probs_similarity.eval())
            results = (-sim[0]).argsort()[0:top_k]
            print(results)
            for val in results:                
                file.write(movies_orig[val][1]+'\t')
            file.write('\n')
            #print("您看的电影是：{}".format(movies_orig[movieid2idx[movie_id_val]]))
            #print("以下是给您的推荐：")
            #p = np.squeeze(sim)
            #p[np.argsort(p)[:-top_k]] = 0
            #p = p / np.sum(p)
#             results = set()
#             while len(results) != 5:
#                 c = np.random.choice(3883, 1, p=p)[0]
#                 results.add(c)    
#             #file.write(str(movies_orig[int(movieid2idx[i])][1])+'\t')
#             for val in (results):
#                 print(type(val))#print(movies_orig[val]) 
        file.close()
        return results
#2、推荐您喜欢的电影
#思路是使用用户特征向量与电影特征矩阵计算所有电影的评分，
#取评分最高的top_k个，同样加了些随机选择部分。
def recommend_your_favorite_movie(user_id_val, top_k = 10):

    loaded_graph = tf.Graph()  #
    file_dir = r'C:\Users\abc\Desktop\model\recommend_your_favorite_movies.txt'
    file = open(file_dir,'w',encoding='utf-8')
    with tf.Session(graph=loaded_graph) as sess:  #
        # Load saved model
        loader = tf.train.import_meta_graph('save.meta')
        loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))
        #推荐您喜欢的电影
        for i in user_id_val:
            file.write(str(i)+'\t')      
            probs_embeddings = (users_matrics[i-1]).reshape([1, 200])
            probs_similarity = tf.matmul(probs_embeddings, tf.transpose(movie_matrics))
            sim = (probs_similarity.eval())
        #     print(sim.shape)
            results = (-sim[0]).argsort()[0:top_k]
            print(results)
            for val in results:                
                file.write(movies_orig[val][1]+'\t')
            file.write('\n')
        #     sim_norm = probs_norm_similarity.eval()
        #     print((-sim_norm[0]).argsort()[0:top_k])

            #print("以下是给您的推荐：")
#             p = np.squeeze(sim)
#             p[np.argsort(p)[:-top_k]] = 0
#             p = p / np.sum(p)
#             results = set()
#             while len(results) != 5:
#                 c = np.random.choice(3883, 1, p=p)[0]
#                 results.add(c)
#             for val in (results):
#                 #print(val)
#                 #print(movies_orig[val])
        file.close()
        return results
#看过这个电影的人还看了（喜欢）哪些电影
#首先选出喜欢某个电影的top_k个人，得到这几个人的用户特征向量。
#然后计算这几个人对所有电影的评分
#选择每个人评分最高的电影作为推荐
#同样加入了随机选择
import random
def recommend_other_favorite_movie(movie_id_val, top_k = 20):
    loaded_graph = tf.Graph()  #
    
    file_dir = r'C:\Users\abc\Desktop\model\recommend_other_favorite_movie.txt'
    with open(file_dir,'w',encoding='utf-8')as file:
        with tf.Session(graph=loaded_graph) as sess:  
            # Load saved model
            loader = tf.train.import_meta_graph('save.meta')
            loader.restore(sess, tf.train.latest_checkpoint(r"C:\Users\abc\Desktop\model"))
            for i in movie_id_val:

                #找相似用户
                probs_movie_embeddings = (movie_matrics[movieid2idx[i]]).reshape([1, 200])
                probs_user_favorite_similarity = tf.matmul(probs_movie_embeddings, tf.transpose(users_matrics))#向量乘矩阵得到向量
                favorite_user_id = np.argsort(probs_user_favorite_similarity.eval())[0][-top_k:]#索引下标
            #     print(normalized_users_matrics.eval().shape)
            #     print(probs_user_favorite_similarity.eval()[0][favorite_user_id])
            #     print(favorite_user_id.shape)

                print("您看的电影是：{}".format(movies_orig[movieid2idx[i]]))
                #找这些用户的电影
                print("喜欢看这个电影的人是：{}".format(users_orig[favorite_user_id-1])) 
                probs_users_embeddings = (users_matrics[favorite_user_id-1]).reshape([-1, 200])
                probs_similarity = tf.matmul(probs_users_embeddings, tf.transpose(movie_matrics))#挑选出的top_k*200矩阵和movie_matrics矩阵相乘
                sim = (probs_similarity.eval())
                results = (-sim[0]).argsort()[0:top_k]
                file.write(str(i)+':'+'\t')
                for val in results:
                    file.write(movies_orig[val][1]+'\t')#写下排好的电影
                file.write('\n')
            
        #     print(results)

        #     print(sim.shape)
        #     print(np.argmax(sim, 1))
#         p = np.argmax(sim, 1)
#         print("喜欢看这个电影的人还喜欢看：")

#         results = set()
#         while len(results) != 5:
#             c = p[random.randrange(top_k)]
#             results.add(c)
#         for val in (results):
#             print(val)
#             print(movies_orig[val])

        return results


In [25]:
#部分推荐结果运行此代码 运行1、3、4、5、6、7、8、13、9、11

#指定用户和电影进行评分：执行rating_movie
#推荐同类型的电影                      ：recommend_same_type_movie
#推荐您喜欢的电影                      ：recommend_your_favorite_movie
#看过这个电影的人还看了（喜欢）哪些电影：recommend_other_favorite_movie
#recommend_same_type_movie(list(movies['MovieID'][0:280]), top_k = 12)
#recommend_your_favorite_movie(list(users['UserID'][0:280]), top_k = 5)
recommend_other_favorite_movie(list(movies['MovieID'][0:280]), top_k = 5)

INFO:tensorflow:Restoring parameters from C:\Users\abc\Desktop\model\save
您看的电影是：[1 'Toy Story (1995)' "Animation|Children's|Comedy"]
喜欢看这个电影的人是：[[3907 'F' 18 14]
 [3015 'M' 56 6]
 [5622 'M' 35 15]
 [428 'F' 18 4]
 [6022 'M' 25 17]]
您看的电影是：[2 'Jumanji (1995)' "Adventure|Children's|Fantasy"]
喜欢看这个电影的人是：[[5918 'M' 25 12]
 [5622 'M' 35 15]
 [3015 'M' 56 6]
 [6022 'M' 25 17]
 [428 'F' 18 4]]
您看的电影是：[3 'Grumpier Old Men (1995)' 'Comedy|Romance']
喜欢看这个电影的人是：[[3031 'M' 18 4]
 [3874 'M' 25 7]
 [1763 'M' 35 7]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[4 'Waiting to Exhale (1995)' 'Comedy|Drama']
喜欢看这个电影的人是：[[5622 'M' 35 15]
 [3907 'F' 18 14]
 [1603 'F' 25 0]
 [3874 'M' 25 7]
 [6022 'M' 25 17]]
您看的电影是：[5 'Father of the Bride Part II (1995)' 'Comedy']
喜欢看这个电影的人是：[[1763 'M' 35 7]
 [1603 'F' 25 0]
 [3907 'F' 18 14]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[6 'Heat (1995)' 'Action|Crime|Thriller']
喜欢看这个电影的人是：[[5622 'M' 35 15]
 [437 'M' 35 17]
 [6022 'M' 25 17]
 [428 'F' 18 4]
 [3031 'M' 18 4]]

 [6022 'M' 25 17]]
您看的电影是：[55 'Georgia (1995)' 'Drama']
喜欢看这个电影的人是：[[3631 'M' 18 4]
 [3874 'M' 25 7]
 [1603 'F' 25 0]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[56 'Kids of the Round Table (1995)' "Adventure|Children's|Fantasy"]
喜欢看这个电影的人是：[[5622 'M' 35 15]
 [1809 'F' 25 14]
 [3015 'M' 56 6]
 [6022 'M' 25 17]
 [3907 'F' 18 14]]
您看的电影是：[57 'Home for the Holidays (1995)' 'Drama']
喜欢看这个电影的人是：[[3031 'M' 18 4]
 [3907 'F' 18 14]
 [1809 'F' 25 14]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[58 'Postino, Il (The Postman) (1994)' 'Drama|Romance']
喜欢看这个电影的人是：[[3874 'M' 25 7]
 [3031 'M' 18 4]
 [437 'M' 35 17]
 [6022 'M' 25 17]
 [5622 'M' 35 15]]
您看的电影是：[59 'Confessional, The (Le Confessionnal) (1995)' 'Drama|Mystery']
喜欢看这个电影的人是：[[1967 'M' 50 7]
 [3874 'M' 25 7]
 [3031 'M' 18 4]
 [437 'M' 35 17]
 [6022 'M' 25 17]]
您看的电影是：[60 'Indian in the Cupboard, The (1995)' "Adventure|Children's|Fantasy"]
喜欢看这个电影的人是：[[3015 'M' 56 6]
 [3031 'M' 18 4]
 [6022 'M' 25 17]
 [5622 'M' 35 15]
 [3907 'F' 18 14]]
您看

 [428 'F' 18 4]]
您看的电影是：[109 'Headless Body in Topless Bar (1995)' 'Comedy']
喜欢看这个电影的人是：[[3907 'F' 18 14]
 [1763 'M' 35 7]
 [437 'M' 35 17]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[110 'Braveheart (1995)' 'Action|Drama|War']
喜欢看这个电影的人是：[[2798 'M' 35 20]
 [6022 'M' 25 17]
 [5622 'M' 35 15]
 [3631 'M' 18 4]
 [428 'F' 18 4]]
您看的电影是：[111 'Taxi Driver (1976)' 'Drama|Thriller']
喜欢看这个电影的人是：[[1967 'M' 50 7]
 [3907 'F' 18 14]
 [3874 'M' 25 7]
 [437 'M' 35 17]
 [6022 'M' 25 17]]
您看的电影是：[112 'Rumble in the Bronx (1995)' 'Action|Adventure|Crime']
喜欢看这个电影的人是：[[3907 'F' 18 14]
 [3031 'M' 18 4]
 [1809 'F' 25 14]
 [437 'M' 35 17]
 [6022 'M' 25 17]]
您看的电影是：[113 'Before and After (1996)' 'Drama|Mystery']
喜欢看这个电影的人是：[[3907 'F' 18 14]
 [3031 'M' 18 4]
 [3874 'M' 25 7]
 [437 'M' 35 17]
 [6022 'M' 25 17]]
您看的电影是：[114 "Margaret's Museum (1995)" 'Drama']
喜欢看这个电影的人是：[[1603 'F' 25 0]
 [1967 'M' 50 7]
 [3874 'M' 25 7]
 [5622 'M' 35 15]
 [6022 'M' 25 17]]
您看的电影是：[115 'Happiness Is in the Field (1995)' 'Comedy

KeyboardInterrupt: 

In [26]:
file.close()

NameError: name 'file' is not defined