In [1]:
# Предметная область: книги 
# Книги разных жанров.

In [897]:
from pymongo import MongoClient, collection
from bson import ObjectId
import pandas as pd
import json
import csv
from statistics import mean, median

In [785]:
file_path = "bookdata.csv"
data_csv = pd.read_csv(file_path, sep=',')
data_csv[0:5]

Unnamed: 0,availability,category,description,num_reviews,price,price_excl_tax,price_incl_tax,product_type,stars,tax,title,upc,url
0,19,default,"Since her assault, Miss Annette Chetwynd has b...",0,13.99,13.99,13.99,books,2,0.0,"Since her assault, Miss Annette Chetwynd has b...",0312262ecafa5a40,https://books.toscrape.com/catalogue/starving-...
1,20,fiction,"Dans une France assez proche de la nôtre, un h...",0,50.1,50.1,50.1,books,1,0.0,"Dans une France assez proche de la nôtre, un h...",6957f44c3847a760,https://books.toscrape.com/catalogue/soumissio...
2,19,young adult,Patient Twenty-nine.A monster roams the halls ...,0,22.65,22.65,22.65,books,1,0.0,Patient Twenty-nine.A monster roams the halls ...,f77dbf2323deb740,https://books.toscrape.com/catalogue/the-requi...
3,20,history,From a renowned historian comes a groundbreaki...,0,54.23,54.23,54.23,books,5,0.0,From a renowned historian comes a groundbreaki...,4165285e1663650f,https://books.toscrape.com/catalogue/sapiens-a...
4,20,historical fiction,"""Erotic and absorbing...Written with starling ...",0,53.74,53.74,53.74,books,1,0.0,"""Erotic and absorbing...Written with starling ...",90fa61229261140a,https://books.toscrape.com/catalogue/tipping-t...


In [786]:
with open('Books.json', 'r', encoding='utf-8') as json_file:
    json_data = json.load(json_file)
json_data[0:1]    

[{'id': 1,
  'url': 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html',
  'title': "It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put 

In [787]:
def insert_data_to_mongodb(csv_file_path, json_file_path, host, port, db_name, collection_name):
    # Подключение к MongoDB
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    # Чтение данных из файла CSV
    csv_data = pd.read_csv(csv_file_path)
    # Запись данных из файла CSV в MongoDB
    csv_data_dict = csv_data.to_dict(orient='records')
    collection.insert_many(csv_data_dict)
    # Чтение данных из файла JSON
    with open(json_file_path) as json_file:
        json_data = json.load(json_file)
    for item in json_data:
        item['_id'] = str(ObjectId())
    # Запись данных из файла JSON в MongoDB
    collection.insert_many(json_data)

In [830]:
# 1) Получить 20 книг с рейтингом 'stars' меньше 2 и отсортированных по цене 'price'
def get_books():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {"stars": {"$lt": 2}}
    result_1 = collection.find(query).sort("price", 1).limit(20)
    return result_1

In [893]:
# 2) Получить книги с рейингом 'stars' 3, которые не относятся к определённому жанру 
#    'category': young adult, history
def get_books_with_rating_and_exclude_genres():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    result_2 = collection.find({
        'stars': 3,
        'category': {'$ne': ['young adult', 'history']}
    })
    return result_2

In [895]:
# 3) Получить книги, число 'availability' которых больше 18 и вычеркнуть из этого списка некторые книги 
#    по жанру'category' business, а также отсортировать по цене 'price'
def get_books_with_availability_and_exclude_genre_and_sort_by_price():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    result_3 = collection.find({
        'availability': {'$gt': 18},
        'category': {'$ne': ['business']}
    }).sort('price', 1)
    return result_3

In [833]:
# 4) Получить книги, количество которых меньше 3, где есть ссылка 'url' и сортировать по рейтингу 'stars'
def get_books_with_low_availability():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        "availability": {"$lt": 3},
        "url": {"$exists": True} 
    }
    result_4 = collection.find(query).sort("stars", -1)
    return result_4

In [834]:
# 5) Получить количество книг с ценой 'price' меньше 30 или больше 50 в некоторых жанрах 
#    'category': default, fiction, sequential art
def get_books_5():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        "$and": [
            { "$or": [
                { "price": { "$lt": 30 } },
                { "price": { "$gt": 50 } }
            ]},
            { "category": { "$in": ["default", "fiction", "sequential art"] } }
        ]
    }
    result_5 = collection.find(query)
    return result_5

In [835]:
# 6) Получить статистику по ценам 'price'
def get_price_statistics():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = [
        {
            "$group": {
                "_id": None,
                "min_price": { "$min": "$price" },
                "max_price": { "$max": "$price" },
                "avg_price": { "$avg": "$price" },
                "total_count": { "$sum": 1 }
            }
        }
    ]
    result_6 = list(collection.aggregate(query))
    return result_6   

In [836]:
# 7) Получить все жанры книг 'category'
def get_all_categories():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    result_7 = collection.distinct('category')
    return result_7 

In [837]:
# 8) Для минимального рейтинга 'stars'=1 получить максимальную цену 'price'
def get_max_price_by_min_stars():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {"stars": 1}
    projection = {"price": 1}
    sort = [("price", -1)]
    result = collection.find(query, projection).sort(sort).limit(1)

    # Обработка результата запроса
    max_price = None
    for doc in result:
        max_price_8 = doc["price"]

    return max_price_8

In [901]:
# 9) Получить статистику цен 'price' для определённых жанров 'category': young adult, history, historical fiction
def get_rating_stats_by_category():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    genres = ['young adult', 'history', 'historical fiction']
    price_statistics = {}

    for genre in genres:
        books = collection.find({'category': genre})
        prices = [book['price'] for book in books]
        price_statistics[genre] = {
            'count': len(prices),
            'mean': mean(prices),
            'median': median(prices)
        }
    return price_statistics

In [883]:
# 10) Получить самую большую цену книги в жанре 'category': business и poetry с рейтингом 'stars' = 4
def get_max_book_price():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        'category': {'$in': ['business', 'poetry']},
        'stars': 4
    }
    sort_by_price_descending = [("price", -1)]
    result_10 = collection.find_one(query, sort=sort_by_price_descending)

    return result_10["price"] if result_10 else None

In [840]:
# 11) Удалить книги по 'price' дешевле 10
def delete_books_under_10():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        'price': {'lt': 10}}
    result_11 = collection.delete_many(query)

    return result_11

In [841]:
# 12) Увеличить цену 'price' в два раз книг в единичном экземпляре 'availability'
def double_price_for_single_availability_books():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        "availability": 1
    }
    books = collection.find(query)
    for book in books:
        price = book["price"]
        increased_price = price * 2
        result_12 = collection.update_one({"_id": book["_id"]}, {"$set": {"price": increased_price}})


    return result_12

In [842]:
# 13) Уменьшить цену 'price' книг с рейтингом 'stars' = 1 в некоторых жанрах 
#    'category': young adult, history, если их количество 'availability' > 3
def decrease_price_for_low_rated_books():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        "stars": 1,
        "category": {"$in": ["young adult", "history"]},
        "availability": {"$gt": 3}
    }
    books = collection.find(query)
    for book in books:
        price = book["price"]
        decreased_price = price - (price * 0.1)  # Уменьшаем цену на 10%
        result_13 = collection.update_one({"_id": book["_id"]}, {"$set": {"price": decreased_price}})

    return result_13

In [843]:
# 14) Увелчить 'availability' число каждой книги на единицу
def increase_availability():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    books = collection.find()
    for book in books:
        availability = book["availability"]
        increased_availability = availability + 1
        result_14 = collection.update_one({"_id": book["_id"]}, {"$set": {"availability": increased_availability}})

    return result_14

In [844]:
# 15) Удалить книги с рейтингом 'stars' = 1
def delete_books_with_one_star():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    query = {
        'stars': 1
    }
    result_15 = collection.delete_many(query)

    return result_15

In [856]:
def default_json_encoder(obj):
    if isinstance(obj, ObjectId):
        return str(obj)
    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

In [859]:
def write_result_to_json(result, file_path):
    result_list = list(result)
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(result_list, file, indent=4, ensure_ascii=False, default=default_json_encoder)

In [869]:
def write_result_to_json_float(result, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(result, file, indent=4, ensure_ascii=False, default=default_json_encoder)

In [847]:
def get_updated_data():
    client = MongoClient(host, port)
    db = client[db_name]
    collection = db[collection_name]
    updated_data = list(collection.find())
    return updated_data

In [848]:
csv_path = 'bookdata.csv'
json_path = 'Books.json'
host = 'localhost'
port = 27017
db_name = 'mydatabase'
collection_name = 'mycollection_books'

In [849]:
insert_data_to_mongodb(csv_path, json_path, host, port, db_name, collection_name)

In [860]:
result_1 = get_books()
write_result_to_json(result_1, 'result_1.json')

In [894]:
result_2 = get_books_with_rating_and_exclude_genres()
write_result_to_json(result_2, 'result_2.json')

In [896]:
result_3 = get_books_with_availability_and_exclude_genre_and_sort_by_price()
write_result_to_json(result_3, 'result_3.json')

In [863]:
result_4 = get_books_with_low_availability()
write_result_to_json(result_4, 'result_4.json')

In [864]:
result_5 = get_books_5()
write_result_to_json(result_5, 'result_5.json')

In [865]:
result_6 = get_price_statistics()
write_result_to_json(result_6, 'result_6.json')

In [866]:
result_7 = get_all_categories()
write_result_to_json(result_7, 'result_7.json')

In [870]:
max_price_8 = get_max_price_by_min_stars()
write_result_to_json_float(max_price_8, 'result_8.json')

In [903]:
price_statistics = get_rating_stats_by_category()
write_result_to_json_float(price_statistics, 'result_9.json')

In [884]:
result_10 = get_max_book_price()
write_result_to_json_float(result_10, 'result_10.json')

In [875]:
result_11 = delete_books_under_10()

In [876]:
result_12 = double_price_for_single_availability_books()

In [877]:
result_13 = decrease_price_for_low_rated_books()

In [878]:
result_14 = increase_availability()

In [879]:
result_15 = delete_books_with_one_star()

In [880]:
write_result_to_json(get_updated_data(), 'result_5_updated_data.json')