# Задание 1

In [88]:
import json
import pandas as pd

In [89]:
def read_json(filepath:str):
    with open(filepath,'r',encoding='utf-8') as file:
        data = json.load(file)
    return data

In [90]:
def save_json(filepath: str, df: pd.DataFrame):
    data = json.dumps(df.to_dict(orient='records'), ensure_ascii=False)   
    with open(filepath, mode='w', encoding='UTF-8') as file:
        file.write(data)

In [91]:
def create_df(data:json):
    return pd.DataFrame(data)

In [92]:
def sort_data(df:pd.DataFrame):
    df = df.sort_values(by='salary',ascending=False)
    return df

In [93]:
def filter_data(df:pd.DataFrame):
    df = df[df['age']<30]
    return df

In [94]:
def get_ten_values(df:pd.DataFrame):
    jobs = ['Программист','Инженер','Повар']
    cities = 'Ереван'
    filtered_df = df[(df['city'] == cities) & (df['job'].isin(jobs))]
    result_df = filtered_df.sort_values(by='age').head(10)
    return result_df


In [95]:
def difficult_filter(df:pd.DataFrame):
    age_min = 25
    age_max = 60
    year_range = [2019, 2022]

    filtered_df = df[
        (df['age'].between(age_min, age_max)) &
        (df['year'].isin(year_range)) &
        ((df['salary'] > 50000) & (df['salary'] <= 75000) | (df['salary'] > 125000) & (df['salary'] < 150000))
    ]
    return filtered_df

In [96]:
def first_task():
    filepath = './55/task_1_item.json'
    data = read_json(filepath=filepath)
    df = create_df(data)
    sotred_df = sort_data(df)
    save_json('./Answers/sorted_data.json', sotred_df)
    filtred_df = filter_data(sotred_df)
    save_json('./Answers/filtred_data.json', filtred_df)
    ten_values_df = get_ten_values(df)
    save_json('./Answers/ten_values_data.json', ten_values_df)
    difficult_filtred_df = difficult_filter(df)
    save_json('./Answers/difficult_filtred_data.json', difficult_filtred_df)
    print('All answers saved in folder ./Answers/')

In [97]:
if __name__ == '__main__':
    first_task()

All answers saved in folder ./Answers/


# Задание 2

In [139]:
import json
import pandas as pd

In [140]:
def get_data(filepath):
    data = []
    current_dict = {}
    
    with open(filepath, mode='r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line == '=====':
                if current_dict:
                    data.append(current_dict)
                    current_dict = {}
            else:
                try:
                    key, value = line.split('::', 1)
                    current_dict[key.strip()] = value.strip()
                except ValueError:
                    print('Error processing line:', line)

        if current_dict:
            data.append(current_dict)

    df = pd.DataFrame(data)
    df['salary'] = pd.to_numeric(df['salary'], errors='coerce')
    df['age'] = pd.to_numeric(df['age'], errors='coerce')
    df['year'] = pd.to_numeric(df['age'], errors='coerce')
    return df

In [141]:
def create_df(data:json):
    return pd.DataFrame(data)

In [142]:
def read_json(filepath:str):
    with open(filepath,'r',encoding='utf-8') as file:
        data = json.load(file)
    return data

In [161]:
def min_avg_max_salary(df):
    min_salary = df['salary'].min()
    avg_salary = df['salary'].mean()
    max_salary = df['salary'].max()
    return float(min_salary), float(avg_salary), float(max_salary)

In [144]:
def count_jobs(df):
    return df['job'].value_counts()

In [145]:
def min_avg_max_salary_by_city(df):
    return df.groupby('city')['salary'].agg(['min', 'mean', 'max'])

In [146]:
def min_avg_max_salary_by_job(df):
    return df.groupby('job')['salary'].agg(['min', 'mean', 'max'])

In [147]:
def min_avg_max_age_by_city(df):
    return df.groupby('city')['age'].agg(['min', 'mean', 'max'])

In [148]:
def min_avg_max_age_by_job(df):
    return df.groupby('job')['age'].agg(['min', 'mean', 'max'])

In [149]:
def max_salary_at_min_age(df):
    min_age = df['age'].min()
    return df[df['age'] == min_age]['salary'].max()

In [150]:
def min_salary_at_max_age(df):
    max_age = df['age'].max()
    return df[df['age'] == max_age]['salary'].min()

In [151]:
def min_avg_max_age_by_city_with_salary_condition(df):
    filtered_df = df[df['salary'] > 50000]
    return filtered_df.groupby('city')['age'].agg(['min', 'mean', 'max']).sort_values(by='mean', ascending=False)

In [152]:
def min_avg_max_salary_in_ranges(df, age_range1=(18, 25), age_range2=(50, 65)):
    filtered_df = df[(df['age'].between(*age_range1)) | (df['age'].between(*age_range2))]
    return filtered_df.groupby(['city', 'job'])['salary'].agg(['min', 'mean', 'max'])

In [153]:
def arbitrary_query(df, match_condition, group_by, sort_by):
    filtered_df = df.query(match_condition)
    grouped_df = filtered_df.groupby(group_by).agg(['count', 'mean', 'sum'])
    sorted_df = grouped_df.sort_values(by=sort_by, ascending=False)
    return sorted_df

In [None]:
def second_task():
    filepath1 = './55/task_1_item.json'
    filepath2 = './55/task_2_item.text'
    data1 = read_json(filepath1)
    df1 = create_df(data1)
    df2 = get_data(filepath2)
    df = pd.concat([df1,df2],axis=0)
    df.to_excel('text.xlsx')
    print("Минимальная, средняя, максимальная зарплата:", min_avg_max_salary(df))
    print("\nКоличество данных по профессиям:\n", count_jobs(df))
    print("\nМинимальная, средняя, максимальная зарплата по городам:\n", min_avg_max_salary_by_city(df))
    print("\nМинимальная, средняя, максимальная зарплата по профессиям:\n", min_avg_max_salary_by_job(df))
    print("\nМинимальный, средний, максимальный возраст по городам:\n", min_avg_max_age_by_city(df))
    print("\nМинимальный, средний, максимальный возраст по профессиям:\n", min_avg_max_age_by_job(df))
    print("\nМаксимальная зарплата при минимальном возрасте:", max_salary_at_min_age(df))
    print("\nМинимальная зарплата при максимальном возрасте:", min_salary_at_max_age(df))
    print("\nМинимальный, средний, максимальный возраст по городам с зарплатой > 50000:\n", min_avg_max_age_by_city_with_salary_condition(df))
    print("\nМинимальная, средняя, максимальная зарплата в диапазонах:\n", min_avg_max_salary_in_ranges(df))

    match_condition = "salary > 50000"
    group_by = 'job'
    sort_by = ('salary', 'mean')

    result = arbitrary_query(df, match_condition, group_by, sort_by)
    print("\nРезультаты произвольного запроса:\n", result)


In [None]:
if __name__ == '__main__':
    second_task()

# Задание 3

In [183]:
import json
import pandas as pd

In [184]:
def get_data_from_json(filepath:str):
    with open(filepath,'r',encoding='utf-8') as file:
        data = json.load(file)
    return data

In [185]:
def get_data_from_csv(filepath:str):
    df = pd.read_csv(filepath,sep=';')
    return df

In [186]:
def get_data_from_txt(filepath):
    data = []
    current_dict = {}
    
    with open(filepath, mode='r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line == '=====':
                if current_dict:
                    data.append(current_dict)
                    current_dict = {}
            else:
                try:
                    key, value = line.split('::', 1)
                    current_dict[key.strip()] = value.strip()
                except ValueError:
                    print('Error processing line:', line)

        if current_dict:
            data.append(current_dict)

    df = pd.DataFrame(data)
    df['salary'] = pd.to_numeric(df['salary'], errors='coerce')
    df['age'] = pd.to_numeric(df['age'], errors='coerce')
    df['year'] = pd.to_numeric(df['age'], errors='coerce')
    return df

In [187]:
def remove_values(df:pd.DataFrame):
    return df[(df['salary'] >= 25000) & (df['salary'] <= 175000)]

In [188]:
def increase_age(df:pd.DataFrame):
    df['age'] += 1
    return df

In [189]:
def salary_by_job(df:pd.DataFrame, jobs:list):
    df.loc[df['job'].isin(jobs), 'salary'] *= 1.05
    return df

In [190]:
def salary_by_city(df:pd.DataFrame, cities:list):
    df.loc[df['city'].isin(cities), 'salary'] *= 1.07
    return df

In [191]:
def salary_by_condition(df:pd.DataFrame, city:str, jobs:str, age_range:str):
    df.loc[
        (df['city'] == city) & 
        (df['job'].isin(jobs)) & 
        (df['age'].between(*age_range)), 
        'salary'
    ] *= 1.10
    return df

In [192]:
def remove_by_predicate(df:pd.DataFrame, predicate:str):
    return df.query(predicate)

In [196]:
def third_task():
    filepath = './55/task_3_item.csv'
    filepath2 = './55/task_1_item.json'
    filepath3 = './55/task_2_item.text'

    df1 = get_data_from_csv(filepath)
    df2 = pd.DataFrame(get_data_from_json(filepath2))
    df3 = get_data_from_txt(filepath3)

    df = pd.concat([df1,df2,df3],axis=0)
    # Удаление документов по предикату
    df = remove_values(df)

    # Увеличение возраста на 1
    df = increase_age(df)

    # Поднятие зарплаты на 5% для выбранных профессий
    selected_jobs = ['IT-специалист', 'Врач']
    df = salary_by_job(df, selected_jobs)

    # Поднятие зарплаты на 7% для выбранных городов
    selected_cities = ['Сан-Себастьян', 'Афины']
    df = salary_by_city(df, selected_cities)

    # Поднятие зарплаты на 10% по сложному предикату
    complex_city = 'Тбилиси' 
    complex_jobs = ['Инженер', 'Менеджер'] 
    age_range = (30, 50) 
    df = salary_by_condition(df, complex_city, complex_jobs, age_range)

    # Удаление записей по произвольному предикату
    custom_predicate = "age < 30"  # Замените на ваш предикат
    df = remove_by_predicate(df, custom_predicate)

    # Вывод результатов
    print(df)

In [None]:
if __name__ == '__main__':
    third_task()

# Задание 4

In [None]:
import json
import pandas as pd