###  Covert the longitudinal blood pressure measurnments into a proper time series

In [10]:
import pandas as pd 

# Load the data with tab seperator  
df = pd.read_csv("patient_longitudinal.csv", sep='\t')

#  Convert `visit_date` to datetime and set as index
df['visit_date'] = pd.to_datetime(df['visit_date'], format='%Y-%m-%d')
df.set_index('visit_date', inplace=True)

# Handle duplicates by grouping with visit_date and applying mean aggregation
df = df[~df.index.duplicated(keep='first')].groupby(level=0).mean()

# Handle missing values by interpolating missing values 
df['bp_systolic'] = df['bp_systolic'].interpolate()

# Convert to pandas Series for a single time series of blood pressure measurements
blood_pressure_series = df['bp_systolic']

# Result: The blood pressure measurements as a time-indexed pandas Series
print(blood_pressure_series)


visit_date
2023-01-17    108.6
2023-02-22    104.8
2023-03-23    106.3
2023-04-16    110.8
2023-06-05    117.7
2023-06-16    118.0
2023-06-21    119.9
2023-07-03    115.5
2023-08-23    119.2
2023-10-13    121.2
2023-10-20    120.0
2023-12-13    123.1
2024-01-17    122.4
2024-03-01    121.3
2024-03-14    122.4
2024-04-04    119.8
2024-04-17    122.5
2024-05-19    119.7
2024-05-25    114.8
2024-07-03    117.1
2024-10-05    115.2
2024-10-21    116.7
2024-11-14    111.8
2024-12-07    110.2
2025-01-19    110.3
2025-02-20    107.3
2025-02-25    104.4
2025-03-25    109.7
2025-04-03    107.5
2025-06-17    105.8
2025-06-23    106.3
2025-07-31    106.7
2025-08-26    104.7
2025-11-12    108.7
2025-12-04    104.8
2025-12-08    107.9
2025-12-22    108.0
2026-01-26    106.5
2026-02-14    107.8
2026-04-28    111.8
2026-05-24    112.1
2026-05-30    107.0
2026-06-30    110.9
2026-07-22    110.3
2026-08-15    111.0
2026-09-19    108.9
2026-09-30    109.6
2026-10-26    106.5
2027-01-07    108.0
2027-02-1