# Merge Garmin .FIT and Lyfta CSV

This notebook extracts heart-rate time-series from a Garmin `.fit` file and merges it set-by-set with a Lyfta CSV export (the CSV format you provided). It includes parsing, visualization, HR peak-based set detection, and produces a merged CSV with per-set HR metrics.

**How to use**:
1. Upload your `Lyfta` CSV and the Garmin `.fit` file into the environment or set their paths in the **Run the merge** cell.
2. Run the cells sequentially. Tune peak-detection parameters interactively if results need adjustments.

**Notes**:
- This notebook expects Python packages `fitparse`, `pandas`, `numpy`, `scipy`, and `matplotlib`. If they're missing, run the install cell below.
- The FIT parsing uses `fitparse`; if you prefer `fitdecode` you can adapt the code.


In [2]:
%%bash
python3 -V
# Install required packages (uncomment to run in your environment)
# pip install fitparse pandas numpy scipy matplotlib

# If running on a managed environment where pip is restricted, install in a venv or run locally.

Python 3.8.10


In [5]:
import os
from datetime import datetime, timedelta
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

!pip install fitparse

# FIT parsing
from fitparse import FitFile
from scipy.signal import find_peaks

print('imports ok')

Collecting fitparse
  Downloading fitparse-1.2.0.tar.gz (65 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: fitparse
  Building wheel for fitparse (setup.py) ... [?25ldone
[?25h  Created wheel for fitparse: filename=fitparse-1.2.0-py3-none-any.whl size=68199 sha256=f3804a36c104aae74c08a5f8c8b35e750578057c37668c97e77c5521f126aba8
  Stored in directory: /home/ainesh/.cache/pip/wheels/49/df/4b/3fa676f0a6dd88074d9e09dbaa428e32f43a59a3c01bb95fe8
Successfully built fitparse
Installing collected packages: fitparse
[31mERROR: Could not install packages due to an OSError: [Errno 13] Permission denied: '/usr/local/bin/fitdump'
Consider using the `--user` option or check the permissions.
[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m

In [None]:
def parse_lyfta_csv(path):
    df = pd.read_csv(path)
    if df.empty:
        raise ValueError('Lyfta CSV empty')
    try:
        workout_start = pd.to_datetime(df.loc[0, 'Date'])
    except Exception:
        workout_start = pd.to_datetime(df.loc[0, 'Date'], infer_datetime_format=True)
    workout_info = {
        'title': df.loc[0, 'Title'],
        'start_time': workout_start.to_pydatetime(),
        'duration_text': df.loc[0, 'Duration'] if 'Duration' in df.columns else None,
        'n_sets': len(df)
    }
    sets = []
    for i, row in df.iterrows():
        sets.append({
            'set_index': int(i),
            'exercise': row.get('Exercise'),
            'weight': row.get('Weight'),
            'reps': row.get('Reps')
        })
    return workout_info, sets


def parse_fit_file(fit_path):
    fitfile = FitFile(fit_path)
    records = []
    activity_start = None
    total_elapsed = None
    garmin_sets = []
    for msg in fitfile.get_messages():
        name = msg.name
        if name == 'file_id':
            for d in msg:
                if d.name == 'time_created' and d.value:
                    activity_start = d.value if activity_start is None else activity_start
        elif name == 'session':
            for d in msg:
                if d.name == 'start_time' and d.value:
                    activity_start = d.value
                if d.name in ('total_elapsed_time','total_timer_time') and d.value:
                    total_elapsed = float(d.value)
        elif name == 'lap':
            lap_start = None
            lap_end = None
            for d in msg:
                if d.name == 'start_time':
                    lap_start = d.value
                if d.name == 'timestamp':
                    lap_end = d.value
            if lap_start and lap_end:
                garmin_sets.append({'start': lap_start, 'end': lap_end})

    # collect records
    fitfile = FitFile(fit_path)
    for record in fitfile.get_messages('record'):
        rec = {}
        for d in record:
            rec[d.name] = d.value
        if 'timestamp' in rec:
            hr = rec.get('heart_rate', None)
            records.append({'timestamp': rec['timestamp'], 'heart_rate': hr})

    hr_df = pd.DataFrame(records)
    hr_df = hr_df.dropna(subset=['timestamp']).drop_duplicates(subset=['timestamp'])
    hr_df['timestamp'] = pd.to_datetime(hr_df['timestamp'])
    hr_df = hr_df.sort_values('timestamp').reset_index(drop=True)
    if hr_df['heart_rate'].isna().all():
        raise ValueError('FIT HR records exist but heart_rate values are all missing.')
    hr_df['heart_rate'] = hr_df['heart_rate'].ffill().bfill().astype(float)
    if activity_start is None:
        activity_start = hr_df['timestamp'].iloc[0]
    if total_elapsed is None:
        total_elapsed = (hr_df['timestamp'].iloc[-1] - hr_df['timestamp'].iloc[0]).total_seconds()
    return {'activity_start': activity_start.to_pydatetime(), 'total_elapsed': float(total_elapsed), 'hr_df': hr_df, 'garmin_sets': garmin_sets}


def detect_sets_from_hr(hr_df, expected_n_sets, min_peak_distance_seconds=30, min_peak_prominence=6):
    df = hr_df.copy().set_index('timestamp')
    df = df.resample('1S').mean().interpolate()
    hr = df['heart_rate'].values
    times = df.index.to_pydatetime()
    if len(hr) < 5:
        return []
    baseline = np.median(hr[:max(1, min(30, len(hr)))])
    distance = int(min_peak_distance_seconds)
    peaks, props = find_peaks(hr, distance=distance, prominence=min_peak_prominence)
    if len(peaks) == 0:
        return []
    prominences = props.get('prominences', np.ones(len(peaks)))
    peak_info = sorted(zip(peaks, prominences), key=lambda x: -x[1])
    selected_peaks = sorted([p for p, _ in peak_info[:expected_n_sets]])
    windows = []
    for p in selected_peaks:
        peak_time = times[p]
        thr = baseline + 0.33 * prominences[list(peaks).index(p)] if len(prominences)>0 else baseline + 3
        left = p
        while left > 0 and hr[left] > thr:
            left -= 1
        right = p
        while right < len(hr)-1 and hr[right] > thr:
            right += 1
        start = times[left]
        end = times[right]
        windows.append({'start': start, 'end': end, 'peak': peak_time})
    return windows


def equal_split_windows(activity_start, activity_end, n):
    total = (activity_end - activity_start).total_seconds()
    windows = []
    for i in range(n):
        s = activity_start + timedelta(seconds=(i * total / n))
        e = activity_start + timedelta(seconds=((i+1) * total / n))
        windows.append({'start': s, 'end': e, 'peak': s + (e - s)/2})
    return windows


def compute_metrics_for_window(hr_df, window):
    s = pd.to_datetime(window['start'])
    e = pd.to_datetime(window['end'])
    mask = (hr_df['timestamp'] >= s) & (hr_df['timestamp'] <= e)
    slice_df = hr_df.loc[mask]
    if slice_df.empty:
        nearest_idx = (hr_df['timestamp'] - s).abs().idxmin()
        sample = hr_df.loc[nearest_idx]
        avg_hr = float(sample['heart_rate'])
        max_hr = float(sample['heart_rate'])
        duration = (e - s).total_seconds()
        hr_curve = [{'ts': str(sample['timestamp']), 'hr': sample['heart_rate']}]
        return {'avg_hr': avg_hr, 'max_hr': max_hr, 'duration': duration, 'hr_curve': hr_curve}
    avg_hr = float(slice_df['heart_rate'].mean())
    max_hr = float(slice_df['heart_rate'].max())
    duration = (e - s).total_seconds()
    hr_curve = [{'ts': str(r['timestamp']), 'hr': float(r['heart_rate'])} for _, r in slice_df.iterrows()]
    return {'avg_hr': avg_hr, 'max_hr': max_hr, 'duration': duration, 'hr_curve': hr_curve}


In [None]:
# Example: plot HR curve and mark detected peaks/windows
def plot_hr_and_windows(hr_df, windows=None, title='HR'): 
    plt.figure(figsize=(12,4))
    plt.plot(hr_df['timestamp'], hr_df['heart_rate'], label='HR')
    if windows:
        for i,w in enumerate(windows):
            plt.axvspan(pd.to_datetime(w['start']), pd.to_datetime(w['end']), alpha=0.2, label=f'set {i}')
            plt.axvline(pd.to_datetime(w['peak']), linestyle='--', alpha=0.6)
    plt.xlabel('time')
    plt.ylabel('bpm')
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

print('plot helper ready')

In [None]:
# Run the merge on your files - set paths here
LYFTA_CSV = 'lyfta_sample.csv'  # <-- change to your Lyfta CSV path
FIT_PATH = '/mnt/data/21159045593_ACTIVITY.fit'  # <-- change if needed
OUT_CSV = 'merged_workout.csv'

# Quick existence checks
print('Files exist?', os.path.exists(LYFTA_CSV), os.path.exists(FIT_PATH))

workout_info, lyfta_sets = parse_lyfta_csv(LYFTA_CSV)
fit_parsed = parse_fit_file(FIT_PATH)
print('Lyfta start:', workout_info['start_time'], 'Sets:', workout_info['n_sets'])
print('Garmin start:', fit_parsed['activity_start'], 'elapsed(s):', fit_parsed['total_elapsed'])

# Attempt detection
windows = detect_sets_from_hr(fit_parsed['hr_df'], expected_n_sets=workout_info['n_sets'], min_peak_distance_seconds=20, min_peak_prominence=4)
if not windows or len(windows) != workout_info['n_sets']:
    print('Detected windows:', len(windows), 'expected:', workout_info['n_sets'], '-> falling back to equal split')
    activity_end = fit_parsed['activity_start'] + timedelta(seconds=fit_parsed['total_elapsed'])
    windows = equal_split_windows(fit_parsed['activity_start'], activity_end, workout_info['n_sets'])

# Compute metrics
merged_rows = []
hr_df = fit_parsed['hr_df']
for idx, set_rec in enumerate(lyfta_sets):
    w = windows[idx]
    metrics = compute_metrics_for_window(hr_df, w)
    merged_rows.append({
        'set_index': idx,
        'exercise': set_rec['exercise'],
        'weight': set_rec['weight'],
        'reps': set_rec['reps'],
        'set_start': w['start'],
        'set_end': w['end'],
        'set_duration_s': metrics['duration'],
        'avg_hr': metrics['avg_hr'],
        'max_hr': metrics['max_hr']
    })

out_df = pd.DataFrame(merged_rows)
out_df.to_csv(OUT_CSV, index=False)
print('Saved merged to', OUT_CSV)

# Plot for visual check
plot_hr_and_windows(hr_df, windows, title=f"Workout {workout_info['title']} - HR with detected sets")

# Display summary
print('\nWorkout summary:')
print('Total sets:', len(out_df))
print('Avg HR across sets:', out_df['avg_hr'].mean())
print('Max HR across sets:', out_df['max_hr'].max())

out_df.head(20)
