## Задание 5 - Подключение к БД

### Подключение библиотек

In [35]:
import os
import psycopg2
import psycopg2.extensions
import pandas as pd
import numpy as np

### Подключение к Postgres

In [36]:
params = {
    "host": "localhost",
    "port": 5432,
    "user": 'postgres'
}
conn = psycopg2.connect(**params)

psycopg2.extensions.register_type(
    psycopg2.extensions.UNICODE,
    conn
)
conn.set_isolation_level(
    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT
)
cursor = conn.cursor()

## Задание 6 - Реализация запросов Pandas

### Запрос 6.1 ( SQL - 3) - Вывести список книг по количеству рецензий

In [47]:
books = pd.read_sql_query("select * from books;",conn)
titles = pd.read_sql_query("select * from titles;",conn)

titles_books = pd.merge(titles, books, left_on='id', right_on='title_id', how='inner')
titles_books_result = titles_books[['title', 'reviews_count']].sort_values(['reviews_count'], ascending=True)

titles_books_result.head(10)

Unnamed: 0,title,reviews_count
9607,Henry & Ramona,3.0
8700,"Sin City: Una Dura Despedida, #1 de 3",4.0
8603,Edvard Munch,17.0
4185,MÅ±vÃ©szeti AnatÃ³mia,24.0
7860,The 20th Century Art Book,24.0
8342,"The Bedside, Bathtub and Armchair Companion t...",25.0
5863,The Three Little Kittens (A First Little Golde...,25.0
9119,Pinocchio (A Little Golden Book),25.0
9984,Tom Clancy's Net Force: Breaking Point,26.0
7861,De toverspiegel van M.C. Escher,27.0


### Запрос 6.2 ( SQL - 4) - Вывести средний рейтинг по пользователям, для пользователей, которые голосовали больше, чем за 10 фильмов и средний рейтинг у которых больше 2, но меньше 5

In [48]:
ratings = pd.read_sql_query("select * from ratings;",conn)

names = {'rating': 'avg_rating', 'user_id': 'count_users'}
groups = ratings.groupby('user_id').agg({'rating': np.mean, 'user_id': np.size}).rename(columns=names)
groups_result = groups[(groups['count_users'] > 10) & (groups['avg_rating'] > 2) & (groups['avg_rating'] < 5)].sort_values("avg_rating", ascending = True)

groups_result.head(10)

Unnamed: 0_level_0,avg_rating,count_users
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
37183,2.052632,19
49545,2.052632,19
35220,2.071429,14
39238,2.076923,13
23591,2.083333,12
14627,2.090909,11
46393,2.115385,26
2144,2.117647,17
41884,2.12987,77
34729,2.133333,15


### Запрос 6.3 ( SQL - 5) - Вывести список книг по среднему рейтингу

In [49]:
ratings = pd.read_sql_query("select * from ratings;",conn)
titles = pd.read_sql_query("select * from titles;",conn)

names = {'rating': 'avg_rating'}
ratings = ratings.groupby('book_id').agg({'rating': np.mean}).rename(columns=names)
ratings_books = pd.merge(ratings, books, left_on='book_id', right_on='id')
ratings_books_titles = pd.merge(ratings_books, titles, left_on='title_id', right_on='id').sort_values("avg_rating", ascending = False).head(10)
books_avg_result = ratings_books_titles[['title', 'avg_rating']]

books_avg_result.head(10)

Unnamed: 0,title,avg_rating
7946,ESV Study Bible,4.820225
6919,The Indispensable Calvin and Hobbes: A Calvin ...,4.78
5206,The Days Are Just Packed: A Calvin and Hobbes ...,4.78
9565,Attack of the Deranged Mutant Killer Monster S...,4.777778
8945,Ø¯ÛŒÙˆØ§Ù†â€Žâ€Ž [DÄ«vÄn],4.774194
6360,There's Treasure Everywhere: A Calvin and Hobb...,4.77
3274,"Harry Potter Boxed Set, Books 1-5 (Harry Potte...",4.77
5579,The Calvin and Hobbes Lazy Sunday Book,4.75
6589,The Authoritative Calvin and Hobbes,4.75
4482,It's a Magical World: A Calvin and Hobbes Coll...,4.75


### Запрос 6.4 ( SQL - 6) - Вывести список авторов по общему количеству рецензий по написанным автором книгам

In [50]:
books = pd.read_sql_query("select * from books;",conn)
authors = pd.read_sql_query("select * from authors;",conn)

books['count_reviews_author'] = books.groupby('authors_id').reviews_count.transform(np.mean)
books_authors = pd.merge(books, authors, left_on='authors_id', right_on='id')
authors_sort = books_authors.sort_values('count_reviews_author', ascending=True)
authors_result = authors_sort[['author_name', 'count_reviews_author']].head(10)

authors_result.head(10)

Unnamed: 0,author_name,count_reviews_author
9836,Cynthia J. McGean,3.0
9376,Ulrich Bischoff,17.0
6921,JenÅ‘ Barcsay,24.0
9260,"Dick Riley, Pam McAllister, Bruce Cassiday",25.0
8029,"Masha, Marie Simchow Stern",25.0
9630,"Eugene Bradley Coco, Ron Dias",25.0
9991,"Steve Perry, Tom Clancy, Steve Pieczenik",26.0
9025,"Bruno Ernst, M.C. Escher, John E. Brigham",27.0
9975,"Malvina G. Vogel, Arthur Conan Doyle",28.0
7110,Jude Fisher,28.0


### Запрос 6.5 ( SQL - 7) - Вывести список авторов и количество книг, написанных каждым автором

In [51]:
books = pd.read_sql_query("select * from books;",conn)
authors = pd.read_sql_query("select * from authors;",conn)

names = {'id': 'count_books'}
authors_books_count = books.groupby('authors_id').agg({'id': np.size}).rename(columns=names)
authors_books_count = pd.merge(authors_books_count, authors, left_on='authors_id', right_on='id').sort_values("count_books", ascending = False).head(10)
authors_books_count_result = authors_books_count[['author_name', 'count_books']]

authors_books_count_result.head(10)

Unnamed: 0,author_name,count_books
57,Stephen King,60
587,Nora Roberts,59
263,Dean Koontz,47
303,Terry Pratchett,42
152,Agatha Christie,39
349,Meg Cabot,37
182,James Patterson,36
716,David Baldacci,34
542,J.D. Robb,33
68,John Grisham,33


## Задание 7 - Выгрузка в .csv файл

In [45]:
authors_books_count_result.to_csv('./../../export/request_5.csv', sep=';', encoding='utf-8')