# Import Libraries

In [3]:
import pandas as pd
import numpy as np
import os
import csv
import xml.etree.ElementTree as ET
import glob

# Ohio1TD Dataset

In [18]:
# Define the folder containing the XML files
input_folder = 'OhioT1D'
output_folder = 'Processed_Data/T1D_Data'
os.makedirs(output_folder, exist_ok=True)  # Create output folder if it doesn't exist

# Loop through each XML file in the folder
for filename in os.listdir(input_folder):
    if filename.endswith('.xml'):  # Process only XML files
        input_file_path = os.path.join(input_folder, filename)
        output_file_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_processed.csv")

        # Load and parse the XML file
        tree = ET.parse(input_file_path)
        root = tree.getroot()

        # Extract glucose level data and format it
        glucose_data = []
        for event in root.find('glucose_level').findall('event'):
            timestamp = event.get('ts')  # Get the timestamp

            # Convert glucose value and round to 2 decimal places
            value = round(float(event.get('value')) / 18, 2)

            time_only = timestamp.split(' ')[1]  # Extract time (HH:MM:SS)
            glucose_data.append((time_only, value))

        # Write to a CSV file
        with open(output_file_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Time', 'Glucose'])  # Write header
            writer.writerows(glucose_data)  # Write data rows

        # Create extra time and classification attributes
        df = pd.read_csv(output_file_path)
        df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
        df['Hour'] = df['Time'].dt.hour
        df['Minute'] = df['Time'].dt.minute
        df["Hour_sin"] = np.sin(2 * np.pi * df["Hour"] / 24)
        df["Hour_cos"] = np.cos(2 * np.pi * df["Hour"] / 24)
        df["Minute_sin"] = np.sin(2 * np.pi * df["Minute"] / 60)
        df["Minute_cos"] = np.cos(2 * np.pi * df["Minute"] / 60)

        # Drop null values from 'Glucose' column
        df['Glucose'].replace('', pd.NA)
        df = df.dropna(subset=['Glucose'])

        # Define glucose level thresholds
        bins = [-float('inf'), 2.5, 5, 8, 10, float('inf')]
        labels = ['Very Low', 'Low', 'Normal', 'High', 'Very High']
        df['Glucose_Level_Range'] = pd.cut(df['Glucose'], bins=bins, labels=labels)

        # Create attributes and values for future time-prediction modelling
        # Rolling window features
        df["Glucose_roll_mean"] = df["Glucose"].rolling(window=5, min_periods=1).mean()

        # Previous glucose values
        df["Glucose_lag1"] = df["Glucose"].shift(1)
        df["Glucose_lag2"] = df["Glucose"].shift(2)
        df["Glucose_lag3"] = df["Glucose"].shift(3)
        df["Glucose_lag4"] = df["Glucose"].shift(4)

        # Drop NaN values from feature generation
        df.dropna(inplace=True)

        # Future glucose values
        df["Glucose_15min"] = df["Glucose"].shift(-3)  # 15 minutes ahead
        df["Glucose_30min"] = df["Glucose"].shift(-6)  # 30 minutes ahead
        df["Glucose_45min"] = df["Glucose"].shift(-9)  # 45 minutes ahead
        df["Glucose_60min"] = df["Glucose"].shift(-12) # 60 minutes ahead

        # Drop NaN values after shifting
        df.dropna(inplace=True)

        # Shape and missing values
        print(df.shape)
        print("Missing values:")
        print(df.isna().sum())

        df.to_csv(output_file_path, index=False)
        print(f"\nProcessed {filename} and saved to {output_file_path}")


(10780, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_Level_Range    0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Processed 559-ws-training.xml and saved to Processed_Data/T1D_Data/559-ws-training_processed.csv
(2498, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_Level_Range    0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_3

# ShanghaiT1DM Dataset

In [13]:
# Define the folder containing the Excel files
input_folder = 'Shanghai_T1DM'
output_folder = 'Processed_Data/T1D_Data'
os.makedirs(output_folder, exist_ok=True)  # Create output folder if it doesn't exist

# Loop through each Excel file in the folder
for filename in os.listdir(input_folder):
    if filename.endswith(('.xlsx', '.xls')):  # Process only Excel files
        input_file_path = os.path.join(input_folder, filename)
        output_file_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_processed.csv")

        # Load the Excel file
        try:
            df = pd.read_excel(input_file_path)

            # Extract time from the timestamp and rename columns
            df['Time'] = pd.to_datetime(df['Date']).dt.time  # Extract time
            df['Glucose'] = (df['CGM (mg / dl)'] / 18).round(2)  # Scale and round Glucose values

            # Drop null values from 'Glucose' column
            df['Glucose'].replace('', pd.NA)
            df = df.dropna(subset=['Glucose'])

            # Create extra time and classification attributes
            df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
            df['Hour'] = df['Time'].dt.hour
            df['Minute'] = df['Time'].dt.minute
            df["Hour_sin"] = np.sin(2 * np.pi * df["Hour"] / 24)
            df["Hour_cos"] = np.cos(2 * np.pi * df["Hour"] / 24)
            df["Minute_sin"] = np.sin(2 * np.pi * df["Minute"] / 60)
            df["Minute_cos"] = np.cos(2 * np.pi * df["Minute"] / 60)

            # Define glucose level thresholds
            bins = [-float('inf'), 2.5, 5, 8, 10, float('inf')]
            labels = ['Very Low', 'Low', 'Normal', 'High', 'Very High']
            df['Glucose_Level_Range'] = pd.cut(df['Glucose'], bins=bins, labels=labels)

            # Create attributes and values for future time-prediction modelling
            # Rolling window features
            df["Glucose_roll_mean"] = df["Glucose"].rolling(window=5, min_periods=1).mean()

            # Previous glucose values
            df["Glucose_lag1"] = df["Glucose"].shift(1)
            df["Glucose_lag2"] = df["Glucose"].shift(2)
            df["Glucose_lag3"] = df["Glucose"].shift(3)
            df["Glucose_lag4"] = df["Glucose"].shift(4)

            # Future glucose values
            df["Glucose_15min"] = df["Glucose"].shift(-1)  # 15 minutes ahead
            df["Glucose_30min"] = df["Glucose"].shift(-2)  # 30 minutes ahead
            df["Glucose_45min"] = df["Glucose"].shift(-3)  # 45 minutes ahead
            df["Glucose_60min"] = df["Glucose"].shift(-4) # 60 minutes ahead

            # Create a new DataFrame with only the desired columns
            new_df = df[['Time', 'Glucose', 'Hour', 'Minute', 'Glucose_Level_Range', 'Hour_sin', 'Hour_cos',
                         'Minute_sin', 'Minute_cos','Glucose_roll_mean','Glucose_lag1', 'Glucose_lag2', 'Glucose_lag3', 'Glucose_lag4',
                         'Glucose_15min', 'Glucose_30min', 'Glucose_45min', 'Glucose_60min']]

            # Write to a CSV file
            new_df.to_csv(output_file_path, index=False)
            created_df = pd.read_csv(output_file_path)

            #Drop null values
            created_df.dropna(inplace=True)
            created_df.to_csv(output_file_path, index=False)

            # Shape and missing values
            print(created_df.shape)
            print("Missing values:")
            print(created_df.isna().sum())

            print(f"\nProcessed {filename} and saved to {output_file_path}")
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

(1331, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Processed 1012_0_20210923.xls and saved to Processed_Data/T1D_Data/1012_0_20210923_processed.csv
(1331, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30

# ShanghaiT2DM Dataset

In [14]:
# Define the folder containing the Excel files
input_folder = 'Shanghai_T2DM'
output_folder = 'Processed_Data/T2D_Data'
os.makedirs(output_folder, exist_ok=True)  # Create output folder if it doesn't exist

# Loop through each Excel file in the folder
for filename in os.listdir(input_folder):
    if filename.endswith(('.xlsx', '.xls')):  # Process only Excel files
        input_file_path = os.path.join(input_folder, filename)
        output_file_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_processed.csv")

        # Load the Excel file
        try:
            df = pd.read_excel(input_file_path)

            # Extract time from the timestamp and rename columns
            df['Date'] = df['Date'].astype(str)
            df['Time'] = df['Date'].str.split('.').str[0]  # Remove fractional seconds
            df['Time'] = pd.to_datetime(df['Time']).dt.time  # Extract time

            # List of column names that contain glucose values
            glucose_columns = ['CGM (mg / dl)', 'CGM ']

            # Loop through the list of possible column names
            for col in glucose_columns:
              if col in df.columns:  # Check if column exists
                df['Glucose'] = (df[col] / 18).round(2)  # Convert and round
                break  # Stop once we find and process the correct column

            # Drop null values from 'Glucose' column
            df['Glucose'].replace('', pd.NA)
            df = df.dropna(subset=['Glucose'])

            # Create extra time and classification attributes
            df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
            df['Hour'] = df['Time'].dt.hour
            df['Minute'] = df['Time'].dt.minute
            df["Hour_sin"] = np.sin(2 * np.pi * df["Hour"] / 24)
            df["Hour_cos"] = np.cos(2 * np.pi * df["Hour"] / 24)
            df["Minute_sin"] = np.sin(2 * np.pi * df["Minute"] / 60)
            df["Minute_cos"] = np.cos(2 * np.pi * df["Minute"] / 60)

            # Define glucose level thresholds
            bins = [-float('inf'), 2.5, 5, 8, 10, float('inf')]
            labels = ['Very Low', 'Low', 'Normal', 'High', 'Very High']
            df['Glucose_Level_Range'] = pd.cut(df['Glucose'], bins=bins, labels=labels)

            #Create attributes and values for future time-prediction modelling
            # Rolling window features
            df["Glucose_roll_mean"] = df["Glucose"].rolling(window=5, min_periods=1).mean()

            # Previous glucose values
            df["Glucose_lag1"] = df["Glucose"].shift(1)
            df["Glucose_lag2"] = df["Glucose"].shift(2)
            df["Glucose_lag3"] = df["Glucose"].shift(3)
            df["Glucose_lag4"] = df["Glucose"].shift(4)


            # Future glucose values
            df["Glucose_15min"] = df["Glucose"].shift(-1)  # 15 minutes ahead
            df["Glucose_30min"] = df["Glucose"].shift(-2)  # 30 minutes ahead
            df["Glucose_45min"] = df["Glucose"].shift(-3)  # 45 minutes ahead
            df["Glucose_60min"] = df["Glucose"].shift(-4) # 60 minutes ahead

            # Create a new DataFrame with only the desired columns
            new_df = df[['Time', 'Glucose', 'Hour', 'Minute', 'Glucose_Level_Range', 'Hour_sin', 'Hour_cos',
                         'Minute_sin', 'Minute_cos','Glucose_roll_mean','Glucose_lag1', 'Glucose_lag2', 'Glucose_lag3', 'Glucose_lag4',
                         'Glucose_15min', 'Glucose_30min', 'Glucose_45min', 'Glucose_60min']]

            # Write to a CSV file
            new_df.to_csv(output_file_path, index=False)
            created_df = pd.read_csv(output_file_path)

            #Drop null values
            created_df.dropna(inplace=True)
            created_df.to_csv(output_file_path, index=False)

            # Shape and missing values
            print(created_df.shape)
            print("Missing values:")
            print(created_df.isna().sum())

            print(f"\nProcessed {filename} and saved to {output_file_path}")
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

(1331, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Processed 2001_1_20201117.xlsx and saved to Processed_Data/T2D_Data/2001_1_20201117_processed.csv
(1137, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_3

# Participant's Data

In [15]:
output_folder = 'Processed_Data/T2D_Data'
filename = 'participant_data.csv'
output_file_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_processed.csv")

# Load the CSV file
try:
  df = pd.read_csv('participant_data.csv', skiprows=range(0, 1))
  df['Historic Glucose mmol/L'].replace('', pd.NA)
  df.dropna(subset=['Historic Glucose mmol/L'])

  # Extract time from the timestamp and rename columns
  df['Time'] = pd.to_datetime(df['Device Timestamp'], dayfirst=True).dt.time  # Extract time
  df['Glucose'] = df['Historic Glucose mmol/L']  # Rename column

  # Create extra time and classification attributes
  df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
  df['Hour'] = df['Time'].dt.hour
  df['Minute'] = df['Time'].dt.minute
  df["Hour_sin"] = np.sin(2 * np.pi * df["Hour"] / 24)
  df["Hour_cos"] = np.cos(2 * np.pi * df["Hour"] / 24)
  df["Minute_sin"] = np.sin(2 * np.pi * df["Minute"] / 60)
  df["Minute_cos"] = np.cos(2 * np.pi * df["Minute"] / 60)

  # Define glucose level thresholds
  bins = [-float('inf'), 2.5, 5, 8, 10, float('inf')]
  labels = ['Very Low', 'Low', 'Normal', 'High', 'Very High']
  df['Glucose_Level_Range'] = pd.cut(df['Glucose'], bins=bins, labels=labels)

  #Create attributes and values for future time-prediction modelling
  # Rolling window features
  df["Glucose_roll_mean"] = df["Glucose"].rolling(window=5, min_periods=1).mean()

  # Previous glucose values
  df["Glucose_lag1"] = df["Glucose"].shift(1)
  df["Glucose_lag2"] = df["Glucose"].shift(2)
  df["Glucose_lag3"] = df["Glucose"].shift(3)
  df["Glucose_lag4"] = df["Glucose"].shift(4)

  # Future glucose values
  df["Glucose_15min"] = df["Glucose"].shift(-1)  # 15 minutes ahead
  df["Glucose_30min"] = df["Glucose"].shift(-2)  # 30 minutes ahead
  df["Glucose_45min"] = df["Glucose"].shift(-3)  # 45 minutes ahead
  df["Glucose_60min"] = df["Glucose"].shift(-4) # 60 minutes ahead

  # Create a new DataFrame with only the desired columns
  new_df = df[['Time', 'Glucose', 'Hour', 'Minute', 'Glucose_Level_Range', 'Hour_sin', 'Hour_cos',
                         'Minute_sin', 'Minute_cos','Glucose_roll_mean','Glucose_lag1', 'Glucose_lag2', 'Glucose_lag3', 'Glucose_lag4',
                         'Glucose_15min', 'Glucose_30min', 'Glucose_45min', 'Glucose_60min']]

  # Get rid of null values
  new_df['Glucose'].replace('', pd.NA)
  new_df = new_df.dropna(subset=['Glucose'])

  # Write to a CSV file
  new_df.to_csv(output_file_path, index=False)
  created_df = pd.read_csv(output_file_path)

  #Drop null values
  created_df.dropna(inplace=True)
  created_df.to_csv(output_file_path, index=False)

  # Shape and missing values
  print(created_df.shape)
  print("Missing values:")
  print(created_df.isna().sum())

  print(f"\nProcessed {filename} and saved to {output_file_path}")
except Exception as e:
  print(f"Failed to process {filename}: {e}")

(21993, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Processed participant_data.csv and saved to Processed_Data/T2D_Data/participant_data_processed.csv


# Create Concatenated CSV Files

In [19]:
def concatenate_csv_files(input_folder, output_file):
  # Get a list of all CSV files in the folder
  csv_files = glob.glob(os.path.join(input_folder, "*.csv"))

  # Initialize an empty list to store DataFrames
  df_list = []

  # Loop through each file and append its contents to the list
  for file in csv_files:
    df = pd.read_csv(file)  # Read CSV file
    df_list.append(df)  # Append DataFrame to list

  # Concatenate all DataFrames into one
  final_df = pd.concat(df_list, ignore_index=True)

  # Get rid of null values
  final_df['Glucose'].replace('', pd.NA)
  final_df = final_df.dropna(subset=['Glucose'])

  # Save the combined DataFrame to a new CSV file
  final_df.to_csv(output_file, index=False)

  # Shape and missing values
  print(final_df.shape)
  print("Missing values:")
  print(final_df.isna().sum())

  print(f"\nSuccessfully merged {len(csv_files)} files into", output_file)

concatenate_csv_files('Processed_Data/T1D_Data', 'Processed_Data/T1D_Data/combined_T1D_data.csv')
concatenate_csv_files('Processed_Data/T2D_Data', 'Processed_Data/T2D_Data/combined_T2D_data.csv')

(89233, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Successfully merged 17 files into Processed_Data/T1D_Data/combined_T1D_data.csv
(128597, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0

### Concatenate Ohio Files

In [20]:
# Define folder containing CSV files
folder_path = 'Processed_Data/T1D_Data'

# Get a list of all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Initialize an empty list to store DataFrames
df_list = []
count = 0

# Loop through each file and append its contents to the list
for file in csv_files:
  if '-ws-' in file:
    count += 1
    df = pd.read_csv(file)  # Read CSV file
    df_list.append(df)  # Append DataFrame to list

# Concatenate all DataFrames into one
final_df = pd.concat(df_list, ignore_index=True)

# Get rid of null values
final_df['Glucose'].replace('', pd.NA)
final_df = final_df.dropna(subset=['Glucose'])

# Save the combined DataFrame to a new CSV file
final_df.to_csv('Processed_Data/T1D_Data/combined_ohio_data.csv', index=False)

# Shape and missing values
print(final_df.shape)
print("Missing values:")
print(final_df.isna().sum())

print(f"\nSuccessfully merged {count} files into 'combined_ohio_data.csv'")

(85033, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_Level_Range    0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Successfully merged 12 files into 'combined_ohio_data.csv'


### Concatenate ShanghaiT1D Files

In [21]:
# Define folder containing CSV files
folder_path = 'Processed_Data/T1D_Data'

# Get a list of all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Initialize an empty list to store DataFrames
df_list = []
count = 0

# Loop through each file and append its contents to the list
for file in csv_files:
  if '10' in file:
    count += 1
    df = pd.read_csv(file)  # Read CSV file
    df_list.append(df)  # Append DataFrame to list

# Concatenate all DataFrames into one
final_df = pd.concat(df_list, ignore_index=True)

# Get rid of null values
final_df['Glucose'].replace('', pd.NA)
final_df = final_df.dropna(subset=['Glucose'])

# Save the combined DataFrame to a new CSV file
final_df.to_csv('Processed_Data/T1D_Data/combined_shanghait1d_data.csv', index=False)

# Shape and missing values
print(final_df.shape)
print("Missing values:")
print(final_df.isna().sum())

print(f"\nSuccessfully merged {count} files into 'combined_shanghait1d_data.csv'")

(4200, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Successfully merged 5 files into 'combined_shanghait1d_data.csv'


### Concatenate ShanghaiT2D Files

In [22]:
# Define folder containing CSV files
folder_path = 'Processed_Data/T2D_Data'

# Get a list of all CSV files in the folder
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Initialize an empty list to store DataFrames
df_list = []
count = 0

# Loop through each file and append its contents to the list
for file in csv_files:
  if '20' in file:
    count += 1
    df = pd.read_csv(file)  # Read CSV file
    df_list.append(df)  # Append DataFrame to list

# Concatenate all DataFrames into one
final_df = pd.concat(df_list, ignore_index=True)

# Get rid of null values
final_df['Glucose'].replace('', pd.NA)
final_df = final_df.dropna(subset=['Glucose'])

# Save the combined DataFrame to a new CSV file
final_df.to_csv('Processed_Data/T2D_Data/combined_shanghait2d_data.csv', index=False)

# Shape and missing values
print(final_df.shape)
print("Missing values:")
print(final_df.isna().sum())

print(f"\nSuccessfully merged {count} files into 'combined_shanghait2d_data.csv'")

(106604, 18)
Missing values:
Time                   0
Glucose                0
Hour                   0
Minute                 0
Glucose_Level_Range    0
Hour_sin               0
Hour_cos               0
Minute_sin             0
Minute_cos             0
Glucose_roll_mean      0
Glucose_lag1           0
Glucose_lag2           0
Glucose_lag3           0
Glucose_lag4           0
Glucose_15min          0
Glucose_30min          0
Glucose_45min          0
Glucose_60min          0
dtype: int64

Successfully merged 105 files into 'combined_shanghait2d_data.csv'
