In [1]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings as wr
from pathlib import Path
from scipy.stats import pearsonr
from statsmodels.nonparametric.smoothers_lowess import lowess
wr.filterwarnings('ignore')

work_dir = Path.cwd().parent.parent / 'Work'
df = pd.read_excel(work_dir / "summary_tests.xlsx")

#print(df)

In [2]:
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
labels = ['10', '20', '30', '40', '50', '60', '70', '80', '90']
df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels, right=False)

delete_columns = ['id_sample', 'age', 'sex', 'Unnamed: 0', 'age_group']
features = [column for column in df.columns if column not in delete_columns]
males = df[df['sex'] == 0] 
females = df[df['sex'] == 1] 


save_dir = work_dir / 'Scatter_plots_for_features'
save_dir.mkdir(parents=True, exist_ok=True)

def is_dir_not_empty(dir):
    return any(dir.iterdir())

def delete_files_in_dir(dir):
    for file in dir.glob('*'):
        if file.is_file():
            file.unlink() 

def get_quantile(dataFrame, column, percent):
    return dataFrame[column].quantile(percent)

def ScatterPlotsForFeatures(dataFrame, feature, feature_name, path_to_file, lable, color):
    path = path_to_file / lable
    path.mkdir(parents=True, exist_ok=True)
    
    fig, ax = plt.subplots(figsize=(12, 8), dpi=300)

    #pearsonr
    r, p = pearsonr(dataFrame[feature], dataFrame['age'])

    sns.scatterplot(data=dataFrame, 
                    x='age',
                    y=feature, 
                    color=color, 
                    ax=ax
    )

    #lowess
    lowess_fit = lowess(dataFrame[feature], dataFrame['age'])
    ax.plot(lowess_fit[:, 0], lowess_fit[:, 1], color='orange', linewidth=2)

    ax.legend(title=f'Признак: {feature_name}\nКорреляция Пирсона R = {round(r, 3)}', loc='upper left', title_fontsize=14)
    ax.set_xlabel('Возраст')
    ax.set_ylabel(feature)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)


    y_max = get_quantile(df, feature, 0.995)
    y_min = get_quantile(df, feature, 0.005)
    ax.set_ylim(ymax=y_max, ymin=y_min)

    file_path = path / f'{feature}.png'
    plt.savefig(file_path, bbox_inches='tight')
    plt.close(fig)



# if (is_dir_not_empty(save_dir / 'All') and is_dir_not_empty(save_dir / 'Males') and is_dir_not_empty(save_dir / 'Females')):
#         delete_files_in_dir(save_dir / 'All')
#         delete_files_in_dir(save_dir / 'Males')
#         delete_files_in_dir(save_dir / 'Females')

for feature in features:
    splitted_f = feature.split('_', 2)
    feature_name = ' '.join(splitted_f)

    ScatterPlotsForFeatures(df, feature, feature_name, save_dir, lable='All', color='green')
    ScatterPlotsForFeatures(males, feature,feature_name, save_dir, lable='Males', color='skyblue')
    ScatterPlotsForFeatures(females, feature, feature_name, save_dir, lable='Females', color='lightcoral')
