# Задание 1

In [32]:
import json
import pandas as pd
from pymongo import MongoClient

In [33]:
def read_json(filepath:str):
    with open(filepath,'r',encoding='utf-8') as file:
        data = json.load(file)
    return data

In [42]:
def save_json(filepath: str, data: list):
    with open(filepath, mode='w', encoding='UTF-8') as file:
        file.write(data)

In [None]:
def create_collection_and_insert_data(client,db_name, collection_name, data):
    db = client[db_name]
    collection = db[collection_name]
    result = collection.insert_many(data)


In [36]:
def get_top_10_sorted_by_salary(collection):
    results = collection.find().sort('salary', -1).limit(10)
    return list(results)

In [37]:
def get_top_15_filtered_by_age(collection):
    results = collection.find({'age': {'$lt': 30}}).sort('salary', -1).limit(15)
    return list(results)

In [38]:
def get_top_10_filtered_by_complex_condition(city, professions,collection):
    results = collection.find({
        'city': city,
        'job': {'$in': professions}
    }).sort('age', 1).limit(10)
    return list(results)

In [39]:
def count_records_filtered_by_conditions(age_range, years,collection):
    results = collection.count_documents({
        'age': {'$gte': age_range[0], '$lte': age_range[1]},
        'year': {'$in': years},
        '$or': [
            {'salary': {'$gt': 50000, '$lte': 75000}},
            {'salary': {'$gt': 125000, '$lt': 150000}}
        ]
    })
    return results    

In [48]:
def first_task(insert_data:bool = False):
    client = MongoClient('mongodb://root:fskFAdsfgf2513ktbgds13515@80.87.107.237:27017')

    if insert_data:
        filepath = './55/task_1_item.json'
        data = read_json(filepath=filepath)
        create_collection_and_insert_data(client, 'practises', 'practise5', data)

    db = client['practises']
    collection = db['practise5']
    
    top_10 = get_top_10_sorted_by_salary(collection)
    json_output = json.dumps(top_10, default=str, ensure_ascii=False)
    print("Первые 10 записей по полю salary:", json_output)
 
    top_15_age = get_top_15_filtered_by_age(collection)
    json_output = json.dumps(top_15_age, default=str, ensure_ascii=False)
    print("Первые 15 записей по полю salary и предикату age<30:", json_output)

    city = 'Ереван'
    jobs = ['Программист','Инженер','Повар']
    top_10_complex = get_top_10_filtered_by_complex_condition(city, jobs,collection)
    json_output = json.dumps(top_10_complex, default=str, ensure_ascii=False)
    print("Первые 10 записей по полю salary:", json_output)

    age_range = (25, 60)
    years = [2019, 2022]
    count = count_records_filtered_by_conditions(age_range, years, collection)
    print("Count of records:", count)

In [49]:
if __name__ == '__main__':
    first_task()

Первые 10 записей по полю salary: [{"_id": "676124bd5582af2f49efefc3", "job": "Программист", "salary": 199877, "id": 1381398, "city": "Скопье", "year": 2021, "age": 54}, {"_id": "676124bd5582af2f49eff047", "job": "Оператор call-центра", "salary": 199756, "id": 3288285, "city": "Варшава", "year": 2013, "age": 48}, {"_id": "676124bd5582af2f49efeffc", "job": "Повар", "salary": 198990, "id": 8597716, "city": "Кишинев", "year": 2002, "age": 65}, {"_id": "676124bd5582af2f49eff043", "job": "Учитель", "salary": 198791, "id": 6653616, "city": "Кордова", "year": 2019, "age": 56}, {"_id": "676124bd5582af2f49efef7a", "job": "Бухгалтер", "salary": 198717, "id": 1348868, "city": "Аликанте", "year": 2019, "age": 18}, {"_id": "676124bd5582af2f49efeeaa", "job": "Водитель", "salary": 198442, "id": 6584214, "city": "Осера", "year": 2008, "age": 41}, {"_id": "676124bd5582af2f49eff016", "job": "Строитель", "salary": 198370, "id": 6818009, "city": "Санкт-Петербург", "year": 2022, "age": 43}, {"_id": "676124

# Задание 2

In [69]:
import json
import pandas as pd

In [70]:
def get_db_connection():
    client = MongoClient('mongodb://root:fskFAdsfgf2513ktbgds13515@80.87.107.237:27017')
    db = client['practises']
    return db['practise5'] 

In [71]:
def get_data(filepath):
    data = []
    current_dict = {}
    
    with open(filepath, mode='r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line == '=====':
                if current_dict:
                    data.append(current_dict)
                    current_dict = {}
            else:
                try:
                    key, value = line.split('::', 1)
                    current_dict[key.strip()] = value.strip()
                except ValueError:
                    print('Ошибка при считывании линии:', line)

        if current_dict:
            data.append(current_dict)

    for item in data:
        if 'salary' in item:
            item['salary'] = float(item['salary']) if item['salary'] else None
        if 'age' in item:
            item['age'] = float(item['age']) if item['age'] else None
        if 'year' in item:
            item['year'] = float(item['year']) if item['year'] else None

    return json.dumps(data, ensure_ascii=False)

In [72]:
def insert_data_from_json(json_data):
    collection = get_db_connection()
    data = json.loads(json_data)
    result = collection.insert_many(data)
    return result.inserted_ids

In [73]:
def get_salary_stats(collection):
    pipeline = [
        {
            "$group": {
                "_id": None,
                "min_salary": {"$min": "$salary"},
                "avg_salary": {"$avg": "$salary"},
                "max_salary": {"$max": "$salary"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [74]:
def get_profession_counts(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$profession",
                "count": {"$sum": 1}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [75]:
def get_salary_stats_by_city(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$city",
                "min_salary": {"$min": "$salary"},
                "avg_salary": {"$avg": "$salary"},
                "max_salary": {"$max": "$salary"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [76]:
def get_salary_stats_by_profession(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$profession",
                "min_salary": {"$min": "$salary"},
                "avg_salary": {"$avg": "$salary"},
                "max_salary": {"$max": "$salary"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [77]:
def get_age_stats_by_city(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$city",
                "min_age": {"$min": "$age"},
                "avg_age": {"$avg": "$age"},
                "max_age": {"$max": "$age"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))


In [78]:
def get_age_stats_by_profession(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$profession",
                "min_age": {"$min": "$age"},
                "avg_age": {"$avg": "$age"},
                "max_age": {"$max": "$age"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [79]:
def get_max_salary_at_min_age(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$age",
                "max_salary": {"$max": "$salary"}
            }
        },
        {
            "$sort": {"_id": 1}
        },
        {
            "$group": {
                "_id": None,
                "max_salary": {"$max": "$max_salary"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [80]:
def get_min_salary_at_max_age(collection):
    pipeline = [
        {
            "$group": {
                "_id": "$age",
                "min_salary": {"$min": "$salary"}
            }
        },
        {
            "$sort": {"_id": -1}
        },
        {
            "$group": {
                "_id": None,
                "min_salary": {"$min": "$min_salary"}
            }
        }
    ]
    return list(collection.aggregate(pipeline))

In [81]:
def get_age_stats_by_city_with_salary_filter(collection):
    pipeline = [
        {
            "$match": {
                "salary": {"$gt": 50000}
            }
        },
        {
            "$group": {
                "_id": "$city",
                "min_age": {"$min": "$age"},
                "avg_age": {"$avg": "$age"},
                "max_age": {"$max": "$age"}
            }
        },
        {
            "$sort": {"avg_age": -1}
        }
    ]
    return list(collection.aggregate(pipeline))

In [82]:
def arbitrary_query(collection):
    pipeline = [
        {
            "$match": {
                "salary": {"$gt": 30000}
            }
        },
        {
            "$group": {
                "_id": "$profession",
                "total_salary": {"$sum": "$salary"},
                "count": {"$sum": 1}
            }
        },
        {
            "$sort": {"total_salary": -1}
        }
    ]
    return list(collection.aggregate(pipeline))

In [87]:
def second_task(insert_data:bool = False):
    if insert_data:
        filepath = './55/task_2_item.text'
        data = get_data(filepath=filepath)
        insert_data_from_json(data)
    collection = get_db_connection()

    print('Вывод минимальной, средней, максимальной salary.')
    print(get_salary_stats(collection),'\n')

    print('Вывод количества данных по представленным профессиям.')
    print(get_profession_counts(collection),'\n')

    print('Вывод минимальной, средней, максимальной salary по городу.')
    print(get_salary_stats_by_city(collection),'\n')

    print('Вывод минимальной, средней, максимальной salary по профессии.')
    print(get_salary_stats_by_profession(collection),'\n')

    print('Вывод минимального, среднего, максимального возраста по городу.')
    print(get_age_stats_by_city(collection),'\n')

    print('Вывод минимального, среднего, максимального возраста по профессии.')
    print(get_age_stats_by_profession(collection),'\n')

    print('Вывод максимальной заработной платы при минимальном возрасте.')
    print(get_max_salary_at_min_age(collection),'\n')

    print('Вывод минимальной заработной платы при максимальном возрасте.')
    print(get_min_salary_at_max_age(collection),'\n')

    print('Вывод минимального, среднего, максимального возраста по городу, при условии, что заработная плата больше 50 000.')
    print(get_age_stats_by_city_with_salary_filter(collection),'\n')

    print('Произвольный запрос с $match, $group, $sort.')
    print(arbitrary_query(collection),'\n')

In [88]:
if __name__ == '__main__':
    second_task()

Вывод минимальной, средней, максимальной salary.
[{'_id': None, 'min_salary': 20048.0, 'avg_salary': 112565.46126033057, 'max_salary': 199877}] 

Вывод количества данных по представленным профессиям.
[{'_id': None, 'count': 1936}] 

Вывод минимальной, средней, максимальной salary по городу.
[{'_id': 'Трухильо', 'min_salary': 47067, 'avg_salary': 122136.86956521739, 'max_salary': 175688.0}, {'_id': 'Кадакес', 'min_salary': 22159.0, 'avg_salary': 113175.11111111111, 'max_salary': 195195.0}, {'_id': 'Вильнюс', 'min_salary': 27872, 'avg_salary': 97170.63157894737, 'max_salary': 188083}, {'_id': 'Белград', 'min_salary': 29002, 'avg_salary': 111712.29166666667, 'max_salary': 189197}, {'_id': 'Алькала-де-Энарес', 'min_salary': 82881.0, 'avg_salary': 146192.05555555556, 'max_salary': 196132}, {'_id': 'Подгорица', 'min_salary': 29740, 'avg_salary': 109322.16666666667, 'max_salary': 188582.0}, {'_id': 'Мадрид', 'min_salary': 30417, 'avg_salary': 111220.43478260869, 'max_salary': 195031.0}, {'_id

# Задание 3

In [None]:
import json
import pandas as pd
import random
from pymongo import MongoClient

In [98]:
def get_data_from_csv(filepath:str):
    df = pd.read_csv(filepath,sep=';')
    json_string = df.to_json(orient='records', force_ascii=False)
    return json_string

In [99]:
def get_db_connection():
    client = MongoClient('mongodb://root:fskFAdsfgf2513ktbgds13515@80.87.107.237:27017')
    db = client['practises']
    return db['practise5'] 

In [None]:
def insert_data_from_json(json_data):
    collection = get_db_connection()
    data = json.loads(json_data)
    result = collection.insert_many(data)

In [101]:
def delete_documents_by_salary(collection):
    result = collection.delete_many({"$or": [{"salary": {"$lt": 25000}}, {"salary": {"$gt": 175000}}]})
    return result.deleted_count

In [102]:
def increment_age(collection):
    result = collection.update_many({}, {"$inc": {"age": 1}})
    return result.modified_count

In [103]:
def increase_salary_for_random_professions(collection, professions):
    selected_profession = random.choice(professions)
    result = collection.update_many({"profession": selected_profession}, {"$mul": {"salary": 1.05}})
    return result.modified_count

In [104]:
def increase_salary_for_random_cities(collection, cities):
    selected_city = random.choice(cities)
    result = collection.update_many({"city": selected_city}, {"$mul": {"salary": 1.07}})
    return result.modified_count

In [105]:
def increase_salary_by_complex_predicate(collection, city, professions, age_range):
    result = collection.update_many(
        {
            "city": city,
            "profession": {"$in": professions},
            "age": {"$gte": age_range[0], "$lte": age_range[1]}
        },
        {"$mul": {"salary": 1.10}}
    )
    return result.modified_count

In [106]:
def delete_documents_by_custom_predicate(collection, predicate):
    result = collection.delete_many(predicate)
    return result.deleted_count

In [None]:
def third_task(insert_data:bool = True):
    if insert_data:
        filepath = './55/task_3_item.csv'
        data = get_data_from_csv(filepath)
        insert_data_from_json(data)

    collection = get_db_connection()

    deleted_count = delete_documents_by_salary(collection)
    print(f"Удалить документы по предикату: {deleted_count}")

    incremented_count = increment_age(collection)
    print(f"Увеличить возраст: {incremented_count}")

    professions = ['IT-специалист', 'Врач']
    increased_salary_count = increase_salary_for_random_professions(collection, professions)
    print(f"Поднять зарплату на 5% для произвольно выбранных профессий: {increased_salary_count}")

    cities =['Сан-Себастьян', 'Афины']
    increased_salary_city_count = increase_salary_for_random_cities(collection, cities)
    print(f"Поднять зарплату на 7% для произвольно выбранных городов: {increased_salary_city_count}")

    city = 'Тбилиси' 
    professions = ['Инженер', 'Менеджер'] 
    age_range = (20, 60)
    increased_salary_complex_count = increase_salary_by_complex_predicate(collection, city, professions, age_range)
    print(f"Поднять зарплату на 10% для выборки по сложному предикату: {increased_salary_complex_count}")

    custom_predicate = {"age": {"$lt": 30}} 
    deleted_custom_count = delete_documents_by_custom_predicate(collection, custom_predicate)
    print(f"Удалить записи по произвольному предикату: {deleted_custom_count}")

In [112]:
if __name__ == '__main__':
    third_task()

Удалить документы по предикату: 123
Увеличить возраст: 2330
Поднять зарплату на 5% для произвольно выбранных профессий: 0
Поднять зарплату на 7% для произвольно выбранных городов: 23
Поднять зарплату на 10% для выборки по сложному предикату: 0
Удалить записи по произвольному предикату: 137


# Задание 4

*Предметная область: покемоны*

In [144]:
import json
import pandas as pd
import requests
from pymongo import MongoClient
from tqdm import tqdm 

In [145]:
def get_db_connection(collection_name:str, db_name:str='practises'):
    client = MongoClient('mongodb://root:fskFAdsfgf2513ktbgds13515@80.87.107.237:27017')
    db = client[db_name]
    return db[collection_name] 

In [146]:
def save_json(data, filepath):
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [147]:
def fetch_pokemon_data(limit=800):
    base_url = "https://pokeapi.co/api/v2/pokemon/"
    all_pokemon = []

    num_requests = limit // 20 + (1 if limit % 20 > 0 else 0)

    for offset in tqdm(range(0, limit, 20), total=num_requests, desc="Fetching Pokémon"):
        response = requests.get(f"{base_url}?limit=20&offset={offset}")
        
        if response.status_code == 200:
            data = response.json()
            for pokemon in data['results']:
                pokemon_response = requests.get(pokemon['url'])
                if pokemon_response.status_code == 200:
                    pokemon_data = pokemon_response.json()
                    combined_data = {
                        'name': pokemon_data['name'],
                        'id': pokemon_data['id'],
                        'height': pokemon_data['height'],
                        'weight': pokemon_data['weight'],
                        'types': [type_info['type']['name'] for type_info in pokemon_data['types']],
                        'abilities': [ability_info['ability']['name'] for ability_info in pokemon_data['abilities']],
                        'stats': {stat['stat']['name']: stat['base_stat'] for stat in pokemon_data['stats']}
                    }
                    all_pokemon.append(combined_data)
                else:
                    print(f"Failed to retrieve data for {pokemon['name']}: {pokemon_response.status_code}")
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

    return all_pokemon

In [148]:
def load_pokemon_data_to_mongo(filepath):    
    with open(filepath, 'r', encoding='utf-8') as f:
        pokemons = json.load(f)
    return pokemons

In [149]:
def insert_data_from_json(json_data, collection_name):
    collection = get_db_connection(collection_name)
    result = collection.insert_many(json_data)

In [150]:
def get_first_10_pokemons(condition=None):
    collection = get_db_connection('pokemons')  
    if condition:
        return list(collection.find(condition).limit(10))
    return list(collection.find().limit(10))


In [151]:
def count_pokemons_by_conditions(conditions):
    collection = get_db_connection('pokemons')
    return collection.count_documents(conditions)

In [152]:
def get_statistical_parameters(field):
    collection = get_db_connection('pokemons') 
    max_value = collection.find_one(sort=[(field, -1)])
    min_value = collection.find_one(sort=[(field, 1)])
    avg_value = collection.aggregate([
        {"$group": {"_id": None, "average": {"$avg": f"${field}"}}}
    ])
    average = list(avg_value)[0]['average'] if avg_value else None

    return {
        "max": max_value[field] if max_value else None,
        "min": min_value[field] if min_value else None,
        "average": average
    }

In [153]:
def get_max_min_by_condition(condition, field):
    collection = get_db_connection('pokemons')
    max_value = collection.find_one(condition, sort=[(field, -1)])
    min_value = collection.find_one(condition, sort=[(field, 1)])

    return {
        "max": max_value[field] if max_value else None,
        "min": min_value [field] if min_value else None
    }

In [154]:
def delete_pokemons_by_condition(condition):
    collection = get_db_connection('pokemons')
    result = collection.delete_many(condition)
    return result.deleted_count

In [155]:
def update_pokemons_by_condition(condition, update_data):
    collection = get_db_connection('pokemons')
    result = collection.update_many(condition, {"$set": update_data})
    return result.modified_count

In [None]:
def fourth_task(insert_data: bool = False):
    if insert_data:
        data = fetch_pokemon_data()
        save_json(data=data, filepath='./Answers/pokemons.json')
        data = load_pokemon_data_to_mongo(filepath='./Answers/pokemons.json')
        insert_data_from_json(data, 'pokemons')
        
    print(get_first_10_pokemons())
    print(count_pokemons_by_conditions({"types": "fire"}))
    print(get_statistical_parameters("height"))
    print(get_max_min_by_condition({"types": "water"}, "weight"))
    print(delete_pokemons_by_condition({"name": "pikachu"}))
    print(update_pokemons_by_condition({"name": "bulbasaur"}, {"height": 7}))



In [157]:
if __name__ == "__main__":
    fourth_task(True)

Fetching Pokémon: 100%|██████████| 40/40 [05:18<00:00,  7.96s/it]


[{'_id': ObjectId('6761679a5582af2f49efff1d'), 'name': 'bulbasaur', 'id': 1, 'height': 7, 'weight': 69, 'types': ['grass', 'poison'], 'abilities': ['overgrow', 'chlorophyll'], 'stats': {'hp': 45, 'attack': 49, 'defense': 49, 'special-attack': 65, 'special-defense': 65, 'speed': 45}}, {'_id': ObjectId('6761679a5582af2f49efff1e'), 'name': 'ivysaur', 'id': 2, 'height': 10, 'weight': 130, 'types': ['grass', 'poison'], 'abilities': ['overgrow', 'chlorophyll'], 'stats': {'hp': 60, 'attack': 62, 'defense': 63, 'special-attack': 80, 'special-defense': 80, 'speed': 60}}, {'_id': ObjectId('6761679a5582af2f49efff1f'), 'name': 'venusaur', 'id': 3, 'height': 20, 'weight': 1000, 'types': ['grass', 'poison'], 'abilities': ['overgrow', 'chlorophyll'], 'stats': {'hp': 80, 'attack': 82, 'defense': 83, 'special-attack': 100, 'special-defense': 100, 'speed': 80}}, {'_id': ObjectId('6761679a5582af2f49efff20'), 'name': 'charmander', 'id': 4, 'height': 6, 'weight': 85, 'types': ['fire'], 'abilities': ['blaze