# Plotting Samsung Health Data

## Setup and Function Definition

The first row always contains a metadata line.
The actual header appears in the next row.

Each data row may start with an empty leading column (a leading comma),
then pandas will assume that first column is the DataFrame index,
shifting all columns left so values end up under misaligned headers.

Curiously, that happens even if the header has a name for it.

In [1]:
# Use the Matplotlib inline magic command (if not already set)
%matplotlib inline
import os
import pandas as pd
import matplotlib.pyplot as plt

samsung_dump_dir = 'samsunghealth_data'

# Define an anonymous function that reads CSV files
# using the pandas library with these specific arguments:
#   `skiprows=1`: ignore the first metadata line.
#   `index_col=False`: ensure the first column is treated as a regular data column.
sam_readcsv = lambda x: pd.read_csv(x, skiprows=1, index_col=False)

## Heart rate

### Plotting

In [None]:
heart_rate_tracker = os.path.join(samsung_dump_dir, 'com.samsung.shealth.tracker.heart_rate.20251018112612.csv')
heart_rate_df = sam_readcsv(heart_rate_tracker)

# Select the relevant columns (heart rate and datetime)
hr_column_name = 'com.samsung.health.heart_rate.heart_rate'
time_column_name = 'com.samsung.health.heart_rate.end_time'

# Convert the time column to datetime objects
heart_rate_df[time_column_name] = pd.to_datetime(heart_rate_df[time_column_name])

# Set the datetime column as the DataFrame index
heart_rate_df = heart_rate_df.set_index(time_column_name)

# Create the figure and axes
plt.figure(figsize=(12, 6))

# Use the pandas plot method (which uses Matplotlib internally)
# Alternatively, you could use hr_data.plot(ax=ax)
heart_rate_df[hr_column_name].plot(title='Heart Rate Over Time', grid=True)

# Label the axes
plt.xlabel("Time")
plt.ylabel("Heart Rate (BPM)")

# Display the plot
plt.show()

### Data export with European formatting

In [None]:
output_file_name = 'heart_rate_european_format.csv'

# Use the prepared blood pressure DataFrame
hr_export_df = heart_rate_df.copy()

# Format Datetime to European (DD/MM/YYYY HH.MM.SS)
time_format_col = 'Timestamp (European Format)'
hr_export_df[time_format_col] = hr_export_df.index.strftime('%d/%m/%Y %H.%M.%S')

# Reset index to make the BP columns accessible as data columns
hr_export_df = hr_export_df.reset_index(drop=True)

# Convert numerics to string and replace decimal point
new_hr_col = 'Heart Rate (bpm)'
hr_export_df[new_hr_col] = hr_export_df[hr_column_name].astype(str).str.replace('.', ',', regex=False)

# Select and reorder the final columns
final_cols = [time_format_col, new_hr_col]
br_final_df = hr_export_df[final_cols]

# Use a semicolon (;) as the delimiter for European format compatibility
br_final_df.to_csv(output_file_name, index=False, sep=';')

print(f"Successfully exported data to: {output_file_name}")
print("First few rows of the exported CSV (note the comma decimals):")
display(br_final_df)


## Systolic (SYS) and diastolic (DIA) blood pressure

### Plotting

In [None]:
blood_pressure_tracker = os.path.join(samsung_dump_dir, 'com.samsung.shealth.blood_pressure.20251018112612.csv')
blood_pressure_df = sam_readcsv(blood_pressure_tracker)

sys_column_name = 'com.samsung.health.blood_pressure.systolic'
dia_column_name = 'com.samsung.health.blood_pressure.diastolic'
pulse_column_name = 'com.samsung.health.blood_pressure.pulse'
time_column_name = 'com.samsung.health.blood_pressure.update_time'

# Convert the time column to datetime objects
blood_pressure_df[time_column_name] = pd.to_datetime(blood_pressure_df[time_column_name])

# Set the datetime column as the DataFrame index
blood_pressure_df = blood_pressure_df.set_index(time_column_name)

# Select only the blood pressure columns for plotting
bp_data = blood_pressure_df[[sys_column_name, dia_column_name, pulse_column_name]].copy()

# Create the figure and axes
plt.figure(figsize=(14, 7))

# Plot both columns. Pandas uses the column names as the legend labels.
ax = bp_data.plot(
    title='Blood Pressure Over Time (Systolic and Diastolic)',
    grid=True,
    figsize=(14, 7) # Re-specifying size just in case
)

# Rename the legend labels for clarity
ax.legend(['Systolic (SYS)', 'Diastolic (DIA)', 'Heart Rate (bpm)'])

# Label the axes
plt.xlabel("Time")
plt.ylabel("Blood Pressure (mmHg)")

plt.show()


### Data export with European formatting

In [None]:
output_file_name = 'blood_pressure_european_format.csv'
    
# Use the prepared blood pressure DataFrame
bp_export_df = blood_pressure_df.copy()

# Format Datetime to European (DD/MM/YYYY HH.MM.SS)
time_format_col = 'Timestamp (European Format)'
bp_export_df[time_format_col] = bp_export_df.index.strftime('%d/%m/%Y %H.%M.%S')

# Reset index to make the BP columns accessible as data columns
bp_export_df = bp_export_df.reset_index(drop=True)

# Convert numerics to string and replace decimal point
new_sys_col = 'Systolic (mmHg)'
new_dia_col = 'Diastolic (mmHg)'
new_pulse_col = 'Heart Rate (bpm)'
bp_export_df[new_sys_col] = bp_export_df[sys_column_name].astype(str).str.replace('.', ',', regex=False)
bp_export_df[new_dia_col] = bp_export_df[dia_column_name].astype(str).str.replace('.', ',', regex=False)
bp_export_df[new_pulse_col] = bp_export_df[pulse_column_name].astype(str).str.replace('.', ',', regex=False)

# Select and reorder the final columns
final_cols = [time_format_col, new_sys_col, new_dia_col, new_pulse_col]
bp_final_df = bp_export_df[final_cols]

# Use a semicolon (;) as the delimiter for European format compatibility
bp_final_df.to_csv(output_file_name, index=False, sep=';')

print(f"Successfully exported data to: {output_file_name}")
print("First few rows of the exported CSV (note the comma decimals):")
display(bp_final_df)
