In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.dates import DateFormatter

In [38]:
dg_df = pd.read_csv('../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/Demographics.csv')
dg_df.head(16)

Unnamed: 0,ID,Gender,HbA1c
0,13,MALE,5.7
1,1,FEMALE,5.5
2,3,FEMALE,5.9
3,4,FEMALE,6.4
4,5,FEMALE,5.7
5,2,MALE,5.6
6,6,FEMALE,5.8
7,7,FEMALE,5.3
8,8,FEMALE,5.6
9,10,FEMALE,6.0


In [39]:
# 전체 데이터에서의 시작 시간과 끝 시간을 계산
def get_full_range(acc_df, eda_df, hr_df, bvp_df, temp_df):
    # 각 데이터프레임의 전체 시작 시간과 종료 시간을 찾음
    full_range_start = min(
        acc_df['datetime'].min(),
        eda_df['datetime'].min(),
        hr_df['datetime'].min(),
        bvp_df['datetime'].min(),
        temp_df['datetime'].min()
    )
    full_range_end = max(
        acc_df['datetime'].max(),
        eda_df['datetime'].max(),
        hr_df['datetime'].max(),
        bvp_df['datetime'].max(),
        temp_df['datetime'].max()
    )
    return full_range_start, full_range_end

In [40]:
# 선택된 id에 대해 결측치 범위 그래프 그리기
def plot_missing_ranges_for_id(selected_id, full_range_start, full_range_end, acc_missing_df, eda_missing_df, hr_missing_df, bvp_missing_df, temp_missing_df):
    # 각 센서별 데이터프레임에서 선택된 id의 데이터만 필터링
    selected_dfs = {
        'ACC': acc_missing_df[acc_missing_df['id'] == selected_id],
        'EDA': eda_missing_df[eda_missing_df['id'] == selected_id],
        'HR': hr_missing_df[hr_missing_df['id'] == selected_id],
        'BVP': bvp_missing_df[bvp_missing_df['id'] == selected_id],
        'TEMP': temp_missing_df[temp_missing_df['id'] == selected_id]
    }
    # 그래프 생성 - 높이를 줄이기 위해 figsize 조정
    fig, ax = plt.subplots(figsize=(12, 4))

    # 전체 데이터 범위를 나타내는 회색 사각형 패치 추가
    full_range_patch = patches.Rectangle((full_range_start, 1.05), full_range_end - full_range_start, 0.5,
                                         color='lightgray', alpha=0.5, label='Full Data Range')
    ax.add_patch(full_range_patch)
    

    # 각 센서 데이터의 결측치 범위를 그래프에 그리기
    colors = {'ACC': 'b', 'EDA': 'g', 'HR': 'r', 'BVP': 'm', 'TEMP': 'c'}
    y_positions = {'ACC': 1.1, 'EDA': 1.2, 'HR': 1.3, 'BVP': 1.4, 'TEMP': 1.5}

    for sensor, df in selected_dfs.items():
        df['start'] = pd.to_datetime(df['start'])
        df['end'] = pd.to_datetime(df['end'])
        for i, row in df.iterrows():
            ax.plot([row['start'], row['end']], [y_positions[sensor], y_positions[sensor]], marker='o', markersize=2, color=colors[sensor], label=sensor if i == 0 else "")

    # 그래프 설정
    ax.set_yticks(list(y_positions.values()))
    ax.set_yticklabels(list(y_positions.keys()))
    ax.set_ylim(1.05, 1.55)  # Y축의 범위를 줄여 간격 조정
    
    ax.set_xlabel('Time')
    ax.set_title(f'Time Ranges for ID {selected_id} with Full Data Range')

    # 날짜 포맷 설정
    date_format = DateFormatter("%Y-%m-%d %H:%M")
    ax.xaxis.set_major_formatter(date_format)
    plt.xticks(rotation=45, fontsize=10)

    # 시작 시간과 종료 시간을 X축 라벨과 동일한 위치에 기울여 표시
    ax.annotate(full_range_start.strftime('%Y-%m-%d %H:%M'),
                xy=(full_range_start, 0), xycoords=('data', 'axes fraction'),
                xytext=(0, -44), textcoords='offset points',
                ha='center', va='center', fontsize=10, rotation=45, color='gray')

    ax.annotate(full_range_end.strftime('%Y-%m-%d %H:%M'),
                xy=(full_range_end, 0), xycoords=('data', 'axes fraction'),
                xytext=(0, -44), textcoords='offset points',
                ha='center', va='center', fontsize=10, rotation=45, color='gray')

    plt.legend()
    plt.tight_layout()
    
    # 그래프를 이미지로 저장
    plt.savefig(f'./total_img/{selected_id:03}_missing_ranges.png')

    # 그래프 닫기
    plt.close(fig)

In [41]:
acc_missing = pd.read_csv('./acc_missing_ranges_with_duration.csv')
eda_missing = pd.read_csv('./eda_missing_ranges_with_duration.csv')
hr_missing = pd.read_csv('./hr_missing_ranges_with_duration.csv')
bvp_missing = pd.read_csv('./bvp_missing_ranges_with_duration.csv')
temp_missing = pd.read_csv('./temp_missing_ranges_with_duration.csv')

acc_missing['start'] = pd.to_datetime(acc_missing['start'])
acc_missing['end'] = pd.to_datetime(acc_missing['end'])
eda_missing['start'] = pd.to_datetime(eda_missing['start'])
eda_missing['end'] = pd.to_datetime(eda_missing['end'])
hr_missing['start'] = pd.to_datetime(hr_missing['start'])
hr_missing['end'] = pd.to_datetime(hr_missing['end'])
bvp_missing['start'] = pd.to_datetime(bvp_missing['start'])
bvp_missing['end'] = pd.to_datetime(bvp_missing['end'])
temp_missing['start'] = pd.to_datetime(temp_missing['start'])
temp_missing['end'] = pd.to_datetime(temp_missing['end'])

In [42]:
for id in range(1, len(dg_df)+1):
    if id == 15: # Skip ID 15
        continue

    acc_df = pd.read_csv(f'../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/{id:03}/ACC_{id:03}.csv')
    eda_df = pd.read_csv(f'../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/{id:03}/EDA_{id:03}.csv')
    bvp_df = pd.read_csv(f'../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/{id:03}/BVP_{id:03}.csv')
    hr_df = pd.read_csv(f'../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/{id:03}/HR_{id:03}.csv')
    temp_df = pd.read_csv(f'../big-ideas-lab-glycemic-variability-and-wearable-device-data-1.1.2/{id:03}/TEMP_{id:03}.csv')

    acc_df['datetime'] = pd.to_datetime(acc_df['datetime'])
    eda_df['datetime'] = pd.to_datetime(eda_df['datetime'])
    bvp_df['datetime'] = pd.to_datetime(bvp_df['datetime'])
    hr_df['datetime'] = pd.to_datetime(hr_df['datetime'])
    temp_df['datetime'] = pd.to_datetime(temp_df['datetime'])

    acc_missing_df = acc_missing[acc_missing['id'] == id]
    eda_missing_df = eda_missing[eda_missing['id'] == id]
    hr_missing_df = hr_missing[hr_missing['id'] == id]
    bvp_missing_df = bvp_missing[bvp_missing['id'] == id]
    temp_missing_df = temp_missing[temp_missing['id'] == id]

    full_range_start, full_range_end = get_full_range(acc_df, eda_df, hr_df, bvp_df, temp_df)
    plot_missing_ranges_for_id(id, full_range_start, full_range_end, acc_missing_df, eda_missing_df, hr_missing_df, bvp_missing_df, temp_missing_df)
    print(f'ID : {id:03} saved')

ID : 001 saved
ID : 002 saved
ID : 003 saved
ID : 004 saved
ID : 005 saved
ID : 006 saved
ID : 007 saved
ID : 008 saved
ID : 009 saved
ID : 010 saved
ID : 011 saved
ID : 012 saved
ID : 013 saved
ID : 014 saved
ID : 016 saved
