In [3]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings as wr
from pathlib import Path
from scipy.stats import pearsonr
wr.filterwarnings('ignore')
work_dir = Path.cwd().parent.parent / 'Work'
df = pd.read_excel(work_dir / "summary_tests.xlsx")
#print(df)

In [None]:
bins = [0, 30, 55, 100]
labels = ['<30', '30-55', '56+']
df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels, right=False)

#df.to_excel("D:/Work/summary_tests_with_age_group.xlsx", index=False)
delete_columns = ['id_sample', 'age', 'sex', 'Unnamed: 0', 'age_group']

features = [column for column in df.columns if column not in delete_columns]

df.loc[df["sex"] == 0, "sex"] = 'М'
df.loc[df["sex"] == 1, "sex"] = 'Ж'

save_dir = work_dir / 'Box_plots_for_features'
save_dir.mkdir(parents=True, exist_ok=True)

def is_dir_not_empty(dir):
    return any(dir.iterdir())

def delete_files_in_dir(dir):
    for file in dir.glob('*'):
        if file.is_file():
            file.unlink() 

def get_quantile(dataFrame, column, percent):
    return dataFrame[column].quantile(percent)

def BoxPlotsForFeatures(data, feature, feature_name, path_to_file):
    
      ax = plt.subplots(figsize=(10, 6))
      ax = sns.boxplot(
            data=data, 
            x='age_group', 
            y=feature, 
            hue='sex', 
            palette={'М': "skyblue", 'Ж': "lightcoral"}, 
            hue_order = ['М', 'Ж']
      )
      y_max = get_quantile(df, feature, 0.995)
      y_min = get_quantile(df, feature, 0.001)
      ax.set_ylim(ymax=y_max, ymin=y_min)
      ax.set_title(f'Диаграмма размаха для признака: {feature_name}', fontsize=14)
      ax.set_xlabel('Возраст')
      ax.set_ylabel(feature_name)
      plt.xticks(fontsize=12)
      plt.yticks(fontsize=12)
      ax.legend(title='Пол', loc='upper right')
      # ax.text(0.015, 0.95, f'Признак: {feature}', 
      #       transform=ax.transAxes,
      #       color='red',
      #       fontsize=14
      # )


      path = path_to_file / f'{feature}.png'
      plt.savefig(path, bbox_inches='tight')
      plt.close()


if (is_dir_not_empty(save_dir)):
        delete_files_in_dir(save_dir)

for feature in features:
    splitted_f = feature.split('_', 2)
    feature_name = ' '.join(splitted_f)
    
    BoxPlotsForFeatures(df, feature, feature_name, save_dir)