# Stats Processing

In [16]:
import pandas as pd
import os, json
from joblib import Parallel, delayed, cpu_count
from tqdm import tqdm
DATASET = "./GarminRawData"

In [17]:
# files = []
# for dirName in os.listdir( DATASET ):

#     basedir = os.path.join( DATASET, dirName )

#     if os.path.isdir( basedir ):
#         for fName in os.listdir( basedir ):
#             if fName[-5:] == ".json":
#                 fName = os.path.join( basedir, fName )
#                 print( f"""Found file: {fName}""" )
#                 files.append( fName )
                
def getData(dirName):
    basedir = os.path.join( DATASET, dirName )
    if os.path.isdir( basedir ):
        for fName in os.listdir( basedir ):
            if fName[-5:] == ".json":
                fName = os.path.join( basedir, fName )
                print( f"""Found file: {fName}""" )
                return fName
files=Parallel(cpu_count())(delayed(getData)(dirName) for dirName in tqdm(os.listdir( DATASET )))

100%|██████████| 10/10 [00:00<00:00, 19222.29it/s]


In [18]:
# data = []

# for fName in files:
#     print(fName)
#     try:
#         f=json.load( open( fName, 'rt') )
#         data.append(f)
#     except json.JSONDecodeError:
#         print(f"{fName} vuoto")


def getFile(fName):
    try:
        f=json.load( open( fName, 'rt') )
        return pd.DataFrame.from_records(f)
    except (json.JSONDecodeError, TypeError):
        return pd.DataFrame()
    
data=Parallel(cpu_count())(delayed(getFile)(fName) for fName in tqdm(files))

100%|██████████| 10/10 [00:00<00:00, 21399.51it/s]


In [20]:
stat_df=pd.concat(data)
stat_df.to_csv( "GarminStats.csv", index=False )

## Filtering
### By duration

In [9]:
tresh_sec = 1800
stat_df.loc[stat_df['duration_sec'] > tresh_sec].groupby('day').count()

Unnamed: 0_level_0,data_filename,t_start,t_stop,duration_sec,serial_number,time_created,unknown_7,manufacturer,garmin_product,number,type
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-04-16,1,1,1,1,1,1,0,1,1,0,1
2024-04-17,1,1,1,1,1,1,0,1,1,0,1
2024-05-04,5,5,5,5,5,5,0,5,5,0,5
2024-05-05,6,6,6,6,6,6,0,6,6,0,6
2024-05-06,3,3,3,3,3,3,0,3,3,0,3
2024-05-07,5,5,5,5,5,5,0,5,5,0,5
2024-05-08,3,3,3,3,3,3,0,3,3,0,3
2024-05-09,2,2,2,2,2,2,0,2,2,0,2
2024-05-10,1,1,1,1,1,1,0,1,1,0,1
2024-05-11,2,2,2,2,2,2,0,2,2,0,2


In [10]:
stat_df=stat_df.loc[stat_df['duration_sec'] > tresh_sec]
stat_df.shape

(89, 12)

### by t_start

In [11]:
stat_df=stat_df.loc[(stat_df['t_start'].dt.hour >= 8) & (stat_df['t_start'].dt.hour <= 11)]
print(stat_df.shape)
stat_df.groupby('day').count()

(72, 12)


Unnamed: 0_level_0,data_filename,t_start,t_stop,duration_sec,serial_number,time_created,unknown_7,manufacturer,garmin_product,number,type
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-05-04,4,4,4,4,4,4,0,4,4,0,4
2024-05-05,5,5,5,5,5,5,0,5,5,0,5
2024-05-06,3,3,3,3,3,3,0,3,3,0,3
2024-05-09,2,2,2,2,2,2,0,2,2,0,2
2024-05-11,2,2,2,2,2,2,0,2,2,0,2
2024-05-12,3,3,3,3,3,3,0,3,3,0,3
2024-05-14,2,2,2,2,2,2,0,2,2,0,2
2024-05-15,6,6,6,6,6,6,0,6,6,0,6
2024-05-16,2,2,2,2,2,2,0,2,2,0,2
2024-05-17,6,6,6,6,6,6,0,6,6,0,6


In [12]:
stat_df.loc[(stat_df['day'] == '2024-05-23') &(stat_df['duration_sec'] > tresh_sec)]

Unnamed: 0,data_filename,t_start,t_stop,duration_sec,serial_number,time_created,unknown_7,manufacturer,garmin_product,number,type,day
1,GarminRawData/4141/2024-05-23-10-48-39.fit.json,2024-05-23 08:48:39+00:00,2024-05-23 16:04:51+00:00,26172.0,3470534141,2024-05-23 08:48:39+00:00,,garmin,3905,,activity,2024-05-23
33,GarminRawData/4253/2024-05-23-10-50-13.fit.json,2024-05-23 08:50:13+00:00,2024-05-24 08:33:45+00:00,85412.0,3470534253,2024-05-23 08:50:14+00:00,,garmin,3905,,activity,2024-05-23
54,GarminRawData/4493/2024-05-23-10-49-44.fit.json,2024-05-23 08:49:44+00:00,2024-05-24 08:33:24+00:00,85420.0,3470534493,2024-05-23 08:49:43+00:00,,garmin,3905,,activity,2024-05-23
100,GarminRawData/4530/2024-05-23-10-50-03.fit.json,2024-05-23 08:50:03+00:00,2024-05-23 19:28:57+00:00,38334.0,3470534530,2024-05-23 08:50:03+00:00,,garmin,3905,,activity,2024-05-23
120,GarminRawData/4526/2024-05-23-10-50-09.fit.json,2024-05-23 08:50:09+00:00,2024-05-24 08:34:24+00:00,85455.0,3470534526,2024-05-23 08:50:09+00:00,,garmin,3905,,activity,2024-05-23
161,GarminRawData/4595/2024-05-23-10-48-47.fit.json,2024-05-23 08:48:47+00:00,2024-05-24 08:34:47+00:00,85560.0,3470534595,2024-05-23 08:48:47+00:00,,garmin,3905,,activity,2024-05-23


In [14]:
stat_df.to_csv('stats.csv')
pd.read_csv('stats.csv').to_excel('stats.xlsx')

In [19]:
stat_df.groupby(['serial_number','day']).count().to_excel('watchbyday.xlsx')

In [36]:
stat_df[['serial_number', 'day', 'duration_sec']].pivot_table(columns='serial_number', index='day', aggfunc='sum').fillna(0).to_excel('duration_day_watch.xlsx')