# بررسی میانگین امتیاز انتشارات و تعداد کتاب های نویسنده براساس تعداد صفحه

In [8]:
import numpy as np
import pandas as pd

# خواندن داده ها

In [2]:
df = pd.read_csv('../data/goodreads/books_db.csv')
df.head()

Unnamed: 0,bookID,title,average_rating,isbn,isbn13,num_pages,ratings_count,text_reviews_count,publication_date,author_id,lang_id,publisher_id
0,1,Harry Potter and the Half-Blood Prince (Harry ...,4.57,439785960,9780440000000.0,652,2095690,27591,9/16/2006,0,0,0
1,2,Harry Potter and the Order of the Phoenix (Har...,4.49,439358078,9780440000000.0,870,2153167,29221,9/1/2004,0,0,0
2,4,Harry Potter and the Chamber of Secrets (Harry...,4.42,439554896,9780440000000.0,352,6333,244,11/1/2003,0,0,1
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,4.56,043965548X,9780440000000.0,435,2339585,36325,5/1/2004,0,0,0
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,4.78,439682584,9780440000000.0,2690,41428,164,9/13/2004,0,0,1


# ادغام داده های مستقل به دیتافریم اصلی

In [3]:
authors = pd.read_csv('../data/goodreads/authors.csv')
language = pd.read_csv('../data/goodreads/language.csv')
publisher = pd.read_csv('../data/goodreads/publisher.csv')
authors.rename(columns={'name': 'author'}, inplace=True)
language.rename(columns={'code': 'lang'}, inplace=True)
publisher.rename(columns={'name': 'publisher'}, inplace=True)

In [4]:
df = df.merge(authors, left_on='author_id', right_index=True)
df = df.merge(language, left_on='lang_id', right_index=True)
df = df.merge(publisher, left_on='publisher_id', right_index=True)
df.drop(columns=['author_id', 'lang_id', 'publisher_id'], inplace=True)
del authors, language, publisher
df.head()

Unnamed: 0,bookID,title,average_rating,isbn,isbn13,num_pages,ratings_count,text_reviews_count,publication_date,author,lang,publisher
0,1,Harry Potter and the Half-Blood Prince (Harry ...,4.57,439785960,9780440000000.0,652,2095690,27591,9/16/2006,J.K. Rowling,eng,Scholastic Inc.
1,2,Harry Potter and the Order of the Phoenix (Har...,4.49,439358078,9780440000000.0,870,2153167,29221,9/1/2004,J.K. Rowling,eng,Scholastic Inc.
2,4,Harry Potter and the Chamber of Secrets (Harry...,4.42,439554896,9780440000000.0,352,6333,244,11/1/2003,J.K. Rowling,eng,Scholastic
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,4.56,043965548X,9780440000000.0,435,2339585,36325,5/1/2004,J.K. Rowling,eng,Scholastic Inc.
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,4.78,439682584,9780440000000.0,2690,41428,164,9/13/2004,J.K. Rowling,eng,Scholastic


# میانگین امتیاز کتاب های یک انشارات

In [5]:
publisher_rating = df.groupby('publisher')['average_rating'].mean().sort_values(ascending=False)
pd.DataFrame(publisher_rating).head()

Unnamed: 0_level_0,average_rating
publisher,Unnamed: 1_level_1
Y Lolfa,5.0
Academica Press,5.0
T&T Clark Int'l,5.0
Raintree,5.0
Boosey & Hawkes Inc,5.0


# تعداد کتاب های هر نویسنده بر اساس دسته ی تعداد صفحات

In [6]:
p = df['num_pages']
df['num_pages_grp'] = np.select(
    [
        p < 250,
        (250 <= p) & (p < 500),
        (500 <= p) & (p < 750),
        (750 <= p) & (p < 1000),
        1000 <= p
    ],
    [
        '<250',
        '250-499',
        '500-749',
        '750-999',
        '>=1000',
    ],
    default=''
)
df.head()

Unnamed: 0,bookID,title,average_rating,isbn,isbn13,num_pages,ratings_count,text_reviews_count,publication_date,author,lang,publisher,num_pages_grp
0,1,Harry Potter and the Half-Blood Prince (Harry ...,4.57,439785960,9780440000000.0,652,2095690,27591,9/16/2006,J.K. Rowling,eng,Scholastic Inc.,500-749
1,2,Harry Potter and the Order of the Phoenix (Har...,4.49,439358078,9780440000000.0,870,2153167,29221,9/1/2004,J.K. Rowling,eng,Scholastic Inc.,750-999
2,4,Harry Potter and the Chamber of Secrets (Harry...,4.42,439554896,9780440000000.0,352,6333,244,11/1/2003,J.K. Rowling,eng,Scholastic,250-499
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,4.56,043965548X,9780440000000.0,435,2339585,36325,5/1/2004,J.K. Rowling,eng,Scholastic Inc.,250-499
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,4.78,439682584,9780440000000.0,2690,41428,164,9/13/2004,J.K. Rowling,eng,Scholastic,>=1000


In [7]:
author_page = df.groupby(['author', 'num_pages_grp']).size()
pd.DataFrame(author_page).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
author,num_pages_grp,Unnamed: 2_level_1
A.B. Yehoshua,250-499,1
A.B. Yehoshua,500-749,1
A.D.P. Briggs,250-499,1
A.E. Cunningham,<250,1
A.J. Jacobs,250-499,1
