In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm

In [43]:
age = pd.read_csv("data/average_age.csv")
wage = pd.read_csv("data/living_wage.csv")
retirement = pd.read_csv("data/retirement_age.csv")

In [44]:
# retirement['year'] = pd.to_datetime(retirement['year'], format='%Y')
age["date"] = pd.to_datetime(age["date"], format="%Y-%m-%d")
wage["date"] = pd.to_datetime(wage["date"], format="%Y-%m-%d")
retirement["date"] = pd.to_datetime(retirement["year"], format="%Y-%m-%d")

In [45]:
start_date = "2000-01-01"
end_date = "2024-01-01"
age = age[(age["date"] >= start_date) & (age["date"] < end_date)]
wage = wage[(wage["date"] >= start_date) & (wage["date"] < end_date)]
retirement = retirement[
    (retirement["date"] >= start_date) & (retirement["date"] < end_date)
]

In [46]:
plt.figure(figsize=(16, 8))

sns.lineplot(
    data=age, x="date", y="value", label="Средний возраст", linestyle="-", marker="o"
)
sns.lineplot(
    data=retirement,
    x="date",
    y="value",
    label="Пенсионный возраст",
    linestyle="-",
    marker="o",
)

plt.title("Изменение среднего возраста и пенсионного возраста в России")
plt.xlabel("Дата")
plt.ylabel("Возраст")
plt.grid(True)
plt.legend()

plt.savefig("results/age_and_retirement.png")

plt.close()

In [47]:
plt.figure(figsize=(16, 8))

sns.lineplot(
    data=wage,
    x="date",
    y="value",
    label="Прожиточный минимум",
    linestyle="-",
    marker="o",
)

plt.title("Изменение прожиточного минимума в России")
plt.xlabel("Дата")
plt.ylabel("Прожиточный минимум")
plt.grid(True)
plt.legend()

plt.savefig("results/wage.png")

plt.close()

## Синтез данных

In [None]:
def generate_retirement_age(mean_age) -> int:
    return int(norm.rvs(loc=mean_age, scale=1))


def get_minimum_wage(year):
    return wage.loc[wage["date"] == year, "value"].values[0]