（20分） 编写一个简单的网页，对于新用户，可以根据 “评分”、“阅读量”、“综合” 三个指标，分别推荐Top10的书籍。图书评分数据使用实战演示课程中的数据集。

In [55]:
import pandas as pd
import numpy as np

In [56]:
users = pd.read_csv('data_set/users.csv')
books = pd.read_csv('data_set/books.csv')
ratings = pd.read_csv('data_set/ratings.csv')

In [57]:
ratings.head()

Unnamed: 0,user_id,isbn,rating
0,276726,0155061224,5
1,276729,052165615X,3
2,276729,0521795028,6
3,276744,038550120X,7
4,276747,0060517794,9


## 基于热度的TOP10

### 1.根据图书的评分

In [58]:
ratings_mean=ratings.groupby('isbn')['rating'].mean()

In [59]:
# 查看平均得到的频次
ratings_mean.sort_values(ascending=False).value_counts()

8.000000     26741
7.000000     21055
10.000000    20213
9.000000     17072
5.000000     16068
             ...  
7.898734         1
8.313725         1
8.028571         1
8.809524         1
8.434783         1
Name: rating, Length: 1374, dtype: int64

In [60]:
# 因为有大量平均评分为10分的图书，所以要经过更次筛选，因此选择评分大于500人次最高评分的图书

In [68]:
# 筛选大于50人次的图书
count_ = ratings.groupby('isbn')['rating'].count()
ratings_new = ratings[ratings['isbn'].isin(count_[count_ > 50].index)]
ratings_new.head()

Unnamed: 0,user_id,isbn,rating
3,276744,038550120X,7
11,276754,0684867621,8
12,276755,0451166892,5
22,276788,043935806X,7
24,276796,0330332775,5


In [88]:
# 查找平均评分次10名的图书
ratings_mean=pd.DataFrame(ratings_new.groupby('isbn')['rating'].mean())
rating_top10=ratings_mean.sort_values(by='rating', ascending=False).head(10)
rating_top10

Unnamed: 0_level_0,rating
isbn,Unnamed: 1_level_1
0345339738,9.402597
0439139597,9.264706
043936213X,9.207547
0345339711,9.120482
0439136369,9.076923
0064400557,9.073529
043935806X,9.058824
0439136350,9.035461
0590353403,8.983193
0439139600,8.981308


In [89]:
# 合并图书详细信息
rating_top10=rating_top10.merge(books, right_on='isbn', left_index=True)

In [98]:
# 列名处理
rating_top10['avg_rating']=rating_top10['rating']
rating_top10=rating_top10.drop('rating', axis=1)

In [99]:
rating_top10

Unnamed: 0,isbn,title,author,year,publisher,avg_rating
4206,0345339738,"The Return of the King (The Lord of the Rings,...",J.R.R. TOLKIEN,1986,Del Rey,9.402597
5431,0439139597,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic,9.264706
9026,043936213X,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,2001,Scholastic,9.207547
780,0345339711,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,1986,Del Rey,9.120482
6330,0439136369,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic,9.076923
3847,0064400557,Charlotte's Web (Trophy Newbery),E. B. White,1974,HarperTrophy,9.073529
5506,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,9.058824
3839,0439136350,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic,9.035461
2809,0590353403,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic,8.983193
6932,0439139600,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2002,Scholastic Paperbacks,8.981308


In [100]:
# 保存csv文件
rating_top10.to_csv('data_set/rating_top10.csv', index=False)

### 2.根据图书的阅读量

In [105]:
reading_count=pd.DataFrame(ratings.groupby('isbn')['user_id'].count())

In [107]:
count_top10=reading_count.sort_values(by='user_id', ascending=False).head(10)

In [108]:
count_top10

Unnamed: 0_level_0,user_id
isbn,Unnamed: 1_level_1
0316666343,702
0971880107,580
0385504209,485
0312195516,382
0060928336,318
059035342X,308
0142001740,305
0446672211,293
044023722X,280
0452282152,275


In [109]:
# 合并图书详细信息
count_top10=count_top10.merge(books, right_on='isbn', left_index=True)

In [110]:
# 列名处理
count_top10['reading_count']=count_top10['user_id']
count_top10=count_top10.drop('user_id', axis=1)

In [111]:
count_top10

Unnamed: 0,isbn,title,author,year,publisher,reading_count
408,0316666343,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown",702
26,0971880107,Wild Animus,Rich Shapero,2004,Too Far,580
748,0385504209,The Da Vinci Code,Dan Brown,2003,Doubleday,485
522,0312195516,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA,382
1105,0060928336,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Rebecca Wells,1997,Perennial,318
2143,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,Arthur A. Levine Books,308
356,0142001740,The Secret Life of Bees,Sue Monk Kidd,2003,Penguin Books,305
706,0446672211,Where the Heart Is (Oprah's Book Club (Paperba...,Billie Letts,1998,Warner Books,293
305,044023722X,A Painted House,John Grisham,2001,Dell Publishing Company,280
231,0452282152,Girl with a Pearl Earring,Tracy Chevalier,2001,Plume Books,275


In [112]:
count_top10.to_csv('data_set/count_top10.csv', index=False)

### 3.根据图书的评分与阅读量的综合情况

In [114]:
ratings_sum = pd.DataFrame(ratings.groupby('isbn')['rating'].sum())

In [119]:
rating_sum_top10=ratings_sum.sort_values(by='rating', ascending=False).head(10)

In [120]:
rating_sum_top10

Unnamed: 0_level_0,rating
isbn,Unnamed: 1_level_1
0316666343,5750
0385504209,4089
0312195516,3129
059035342X,2761
0142001740,2579
0971880107,2549
0060928336,2514
0446672211,2385
0452282152,2199
0671027360,2143


In [121]:
# 合并图书详细信息
rating_sum_top10=rating_sum_top10.merge(books, right_on='isbn', left_index=True)

In [122]:
# 列名处理
rating_sum_top10['rating_sum']=rating_sum_top10['rating']
rating_sum_top10=rating_sum_top10.drop('rating', axis=1)

In [123]:
rating_sum_top10

Unnamed: 0,isbn,title,author,year,publisher,rating_sum
408,0316666343,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown",5750
748,0385504209,The Da Vinci Code,Dan Brown,2003,Doubleday,4089
522,0312195516,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA,3129
2143,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,Arthur A. Levine Books,2761
356,0142001740,The Secret Life of Bees,Sue Monk Kidd,2003,Penguin Books,2579
26,0971880107,Wild Animus,Rich Shapero,2004,Too Far,2549
1105,0060928336,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Rebecca Wells,1997,Perennial,2514
706,0446672211,Where the Heart Is (Oprah's Book Club (Paperba...,Billie Letts,1998,Warner Books,2385
231,0452282152,Girl with a Pearl Earring,Tracy Chevalier,2001,Plume Books,2199
118,0671027360,Angels &amp; Demons,Dan Brown,2001,Pocket Star,2143


In [124]:
rating_sum_top10.to_csv('data_set/rating_sum_top10.csv', index=False)