In [31]:
from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING
from pymongo import MongoClient, IndexModel, ASCENDING, DESCENDING

In [32]:
client = MongoClient("mongodb://localhost:27018/")
db = client["anime_database"]

In [39]:
db.animes_opt.drop()
db.profiles_opt.drop()
db.reviews_opt.drop()

animes = db['animes']
profiles = db['profiles']
reviews = db['reviews']

# Optimized collections
animes_opt = db['animes_opt']
profiles_opt = db['profiles_opt']
reviews_opt = db['reviews_opt']

# Index definitions

index_animes_opt = [
    IndexModel([("uid", ASCENDING)]),
    IndexModel([("genre", ASCENDING)]),
]

index_profiles_opt = [
    IndexModel([("profile", ASCENDING)]),
    IndexModel([("gender", ASCENDING)]),
]

index_reviews_opt = [
    IndexModel([("anime_uid", ASCENDING)]),
    IndexModel([("profile", ASCENDING)]),
]

# Create indexes on optimized collections
animes_opt.create_indexes(index_animes_opt)
profiles_opt.create_indexes(index_profiles_opt)
reviews_opt.create_indexes(index_reviews_opt)

# Computed pattern
# Computing all averages
pipeline_animes = [
    {"$project": {"_id": 0}},
    {"$lookup": {
        "from": "reviews",
        "localField": "uid",
        "foreignField": "anime_uid",
        "as": "reviews"
    }},
    {"$project": {
        "uid": 1,
        "title": 1,
        "synopsis": 1,
        "genre": 1,
        "episodes": 1,
        "members": 1,
        "popularity": 1,
        "ranked": 1,
        "score": 1,
        "start_years": 1,
        "end_years": 1,
        "average_story": {"$avg": "$reviews.scores.Story"},
        "average_animation": {"$avg": "$reviews.scores.Animation"},
        "average_sound": {"$avg": "$reviews.scores.Sound"},
        "average_character": {"$avg": "$reviews.scores.Character"},
        "average_enjoyment": {"$avg": "$reviews.scores.Enjoyment"},
        "average_score": {"$avg": "$reviews.scores.Overall"}
    }},
    {"$out": "animes_opt"}
]

animes.aggregate(pipeline_animes, allowDiskUse=True)

# Transfer data from 'profiles' to 'profiles_opt'
pipeline_profiles = [
    {"$project": {"_id": 0}},  # Exclude _id field from output
    {"$out": "profiles_opt"}  # Output to new collection
]
profiles.aggregate(pipeline_profiles, allowDiskUse=True)

# Transfer data from 'reviews' to 'reviews_opt'
pipeline_reviews = [
    {"$project": {"_id": 0}},  # Exclude _id field from output
    {"$out": "reviews_opt"}  # Output to new collection
]
reviews.aggregate(pipeline_reviews, allowDiskUse=True)



print("Optimization process completed successfully.")

Optimization process completed successfully.
