In [36]:
import pandas as pd

# Function to load and preprocess each file
def load_and_preprocess(file_path, skip_rows):
    df = pd.read_csv(file_path, skiprows=skip_rows, low_memory=False)
    df = df[['Date', 'Time', 'Sensor Glucose (mmol/L)']]
    df['Timestamp'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], errors='coerce', dayfirst=True)
    df = df.dropna(subset=['Timestamp', 'Sensor Glucose (mmol/L)'])
    df = df.rename(columns={'Sensor Glucose (mmol/L)': 'Glucose'})
    df = df.set_index('Timestamp').sort_index()
    return df[['Glucose']]

# Load and preprocess each file
df_glucose1 = load_and_preprocess('BG.csv', skip_rows=731)
df_glucose2 = load_and_preprocess('BG-1.csv', skip_rows=3323)
df_glucose3 = load_and_preprocess('BG-2.csv', skip_rows=4719)

# Concatenate all DataFrames
df_combined = pd.concat([df_glucose1, df_glucose2, df_glucose3])
df_combined = df_combined.sort_index()

# Handle duplicates by averaging glucose values if they share the same timestamp
df_combined = df_combined.groupby(df_combined.index).mean()

# Save the combined data to a new CSV file
df_combined.to_csv('UoM2308.csv')

# Display the combined data
df_combined

Unnamed: 0,Timestamp,Glucose
0,28/11/2023 16:05:21,15.6
1,28/11/2023 16:10:21,14.6
2,28/11/2023 16:15:21,13.5
3,28/11/2023 16:20:21,12.4
4,28/11/2023 16:25:21,11.0
...,...,...
49050,27/05/2024 22:20:26,6.9
49051,27/05/2024 22:25:26,7.2
49052,27/05/2024 22:30:26,7.5
49053,27/05/2024 22:35:26,7.8


In [37]:
duplicates = df_combined[df_combined.index.duplicated(keep=False)]
print(duplicates)

Empty DataFrame
Columns: [Timestamp, Glucose]
Index: []
