In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import datetime as dt

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.colors as clrs

from sklearn import preprocessing #ajout du numéro 

import time
import os

%matplotlib inline

# Permet de comparer la complétion de deux dataframes.

In [1]:
def df_completion(df_1, df_2):
    return (pd.concat([(df_1
                        .notna()
                        .sum()
                        .mul(100/len(df_1))),
                       (df_2
                        .notna()
                        .sum()
                        .mul(100/len(df_2)))], axis=1)
            .rename({0: "ath1", 1:"ath2"}, axis=1)
            .style
            # .bar(cmap='RdYlGn')
            .background_gradient(cmap="RdYlGn")
            .format("{:.1f}%")
            .set_properties(**{'width':'100px', 'text-align':'center'})
            .set_table_styles([ dict(selector='th', props=[('text-align','center')]) ]) )

# Filtrer un dataframe

In [2]:
def filter_df(df_tmp, qtty='time_only', filtre={'kind':'include', 'list':["2021-06-09"]}):
    val_list = filtre['list']
    val_type = set([type(val) for val in val_list])

    if len(val_type) == 1 :
        if qtty == "date" and isinstance(val_list[0], str) :
            val_list = [dt.datetime.strptime(x,"%Y-%m-%d").date() for x in val_list]
#C>-----
        if filtre['kind'] == 'include' :
            if isinstance(val_list[0], (int, float, complex)) :
                df_tmp = df_tmp[df_tmp[qtty].isin(val_list)]
            elif isinstance(val_list[0], str) :
                df_tmp = df_tmp[df_tmp[qtty].str.contains('|'.join(val_list))]
            elif qtty == "date" and not isinstance(val_list[0], str):
                df_tmp = df_tmp.reset_index(drop=True)
                list_index = []
                for date in val_list :
                    list_index += list(df_tmp[df_tmp[qtty] == date].index)
                df_tmp = df_tmp.iloc[list_index]

        elif filtre['kind'] == 'exclude' :
            if isinstance(val_list[0], (int, float, complex)) :
                df_tmp = df_tmp[~df_tmp[qtty].isin(val_list)]
            elif isinstance(val_list[0], str) :
                df_tmp = df_tmp[~df_tmp[qtty].str.contains('|'.join(val_list))]
            elif qtty == "date" and not isinstance(val_list[0], str):
                list_index = []
                for date in val_list :
                    list_index += list(df_tmp[df_tmp[qtty] == date].index)
                df_tmp = df_tmp.iloc[list_index]

        return df_tmp
#C>-----
    elif len(val_type) == 0 :
        print("You didn't give any value to filter, so you get the initial dataframe you sent in.")
        return df_tmp
#C>-----
    else :
        print("Better strategy : you should give only one type of values to filter and use the function multiple times.")
        return df_tmp

# Voir une journée d'entraînement: heures de début et fin, phase du cycle 

In [3]:
def focus_on_training(df_tmp, list_dates):
    for date in list_dates :
        df_loc = df_tmp[df_tmp['date'] == date]

        heure_debut, heure_fin = df_loc["Timestamp"].min(), df_loc["Timestamp"].max()
        duree = heure_fin - heure_debut
        print(f"Entraînement {df_loc['training_counter'].unique()}:{df_loc['type_ent'].unique()}, le {date} avec {len(df_loc)} points, de {heure_debut.time()} à {heure_fin.time()}, durée = {duree} - phase du cycle: {df_loc['phase'].unique().tolist()}")

---

# Quelques plots

In [None]:
def plot_df(df_tmp, qtty_x='tps', qtty_y='heart_rate', groupby='date', figtitle="", tuple_figsize=(12.,12.)):
    fig, ax = plt.subplots(figsize=tuple_figsize)

    for group, d in df_tmp.groupby(df_tmp[groupby]):
        g = d.plot(ax=ax, x=qtty_x, y=qtty_y, title=figtitle)   #, label=group
        g.set_xlabel(qtty_x, fontsize=14)
        g.set_ylabel(qtty_y, fontsize=14)

    plt.legend('',frameon=False)
    # plt.legend(*[*zip(*{l:h for h,l in zip(*ax.get_legend_handles_labels())}.items())][::-1])


def plot_df_2criteria(df_tmp, qtty_x='tps', qtty_y='heart_rate', criteria1='date', criteria2='phase',
                      criteria2_color={'Unknown':'yellow', 'Folliculaire':'blue', 'Lutéale':'green', 'Menstruations':'red'}, figtitle="", tuple_figsize=(12.,12.)):

    fig, ax = plt.subplots(figsize=tuple_figsize)

    for group1, df1 in df_tmp.groupby(df_tmp[criteria1]):
        for group2, df2 in df1.groupby(df1[criteria2]):
            g = df2.plot(ax=ax, x=qtty_x, y=qtty_y, label=group2, color=criteria2_color[group2], title=figtitle)
            g.set_xlabel(qtty_x, fontsize=14)
            g.set_ylabel(qtty_y, fontsize=14)

    # plt.legend(*[*zip(*{l:h for h,l in zip(*ax.get_legend_handles_labels())}.items())][::-1])




def plot_df_1day_time_series(df_loc, x_qtty="Timestamp", qtties_plot=["heart_rate"], tuple_figsize=(22.,12.), shade_area=False):
    list_of_colors = ['#33FFFF', '#00FF00', '#CCCC00', '#FF9966', '#CC3366', '#99CCCC', '#CC66CC', '#66FF99', '#FFFF66', '#6600FF', '#006699', '#CC9966', '#3366FF', '#FF3366', '#000000']

    fig, ax = plt.subplots(nrows=len(qtties_plot), figsize=tuple_figsize, sharex=True)

    for ctr, (y_qtty, color) in enumerate(zip(qtties_plot, list_of_colors)):
        df_loc.plot(x=x_qtty, y=y_qtty, color=color, ax=ax[ctr])
        ax[ctr].set_ylabel(y_qtty)
        if shade_area :
            df_loc.plot.area(x=x_qtty, y=y_qtty, color=color, alpha=0.2, ax=ax[ctr])
        plt.subplots_adjust(hspace = .001)

# Extraire un dataframe formaté pour les séries temporelles

In [None]:
def make_df_timeseries(df_tmp, qtties=['altitude']):
    t_start = time.time()

    listou = []
    for date, d1 in df_tmp.groupby("date"):
        for train, d2 in d1.groupby("training_index"):
            if len(d2["training_counter"].unique()) > 1 :
                print("Julian fucked up...")
            else :
                numero_entrainement = d2["training_counter"].unique()[0]

            if len(d2["type_ent"].unique()) > 1 :
                print("Julian fucked up really bad...")
            else :
                type_entrainement = d2["type_ent"].unique()[0]

            for qtty in qtties :
                tmp_dict = {"numero_entrainement":numero_entrainement, "type_entrainement":type_entrainement, "grandeur":qtty}

                ts_values = d2[qtty].tolist()
                ts_tps = d2['tps'].tolist()
                loc_dict = {k:v for k, v in zip(ts_tps, ts_values)}
                
                tmp_dict['length'] = len(loc_dict)

                tmp_dict.update(loc_dict)
                listou.append(tmp_dict)

    df_ts = pd.DataFrame(listou)
    print(f"Le dataframe contient {df_ts.shape[0]} séries temporelles différentes, avec {df_ts.shape[1]} points pour la plus longue.")
    print(f"Time taken to create the time series: {time.time()-t_start}")    
    return df_ts