In [1]:
import mojimoji
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re

# Reading Data

In [2]:
df_bp = pd.read_csv(
    r'/home/hiroshi_koshimizu/work/dataset/P005_BpData_180424.csv', 
    encoding = 'shift-jis', 
    engine = 'python',
    index_col = 'Unnamed: 0'
)

# Convert String

In [3]:
df_bp['Name'] = df_bp['Name'].map(lambda string: mojimoji.zen_to_han(string))
df_bp['Name'] = df_bp['Name'].map(lambda string: string.replace('･', '/'))
df_bp['Date'] = pd.DatetimeIndex(df_bp['Date'])
df_bp['Month'] = df_bp['Date'].map(lambda date:date.month)
df_bp['Week'] = df_bp['Date'].map(lambda date:date.week)

# Delete useless colomns

In [4]:
df_bp.columns

Index(['Name', 'Birth', 'Age', 'Sex', 'Date', 'Time', 'Type', 'SBP', 'DBP',
       'PUL', 'Fit', 'Move', 'Temp', 'Irr.PUL', 'Check', 'Meas_Time', 'Month',
       'Week'],
      dtype='object')

In [5]:
df_bp = df_bp.drop(['Birth', 'Age', 'Sex', 'Fit', 'Move', 'Check', 'Irr.PUL', 'Meas_Time', 'Week', 'Month'], axis = 1)

# Extract Type

In [6]:
df_bp = df_bp[df_bp['Type'].isin(['M'])].reset_index(drop = True)

# Group by

In [7]:
day_param = 14
df_bp_gr = df_bp.groupby(by = ['Name','Date'], as_index = False, observed = True).mean()
df_bp_gr = df_bp_gr.set_index('Date')
df_bp_gr_7d = df_bp_gr.groupby(['Name'], as_index=False).resample(str(day_param)+'D', label='left').apply(list)
df_bp_gr_7d = df_bp_gr_7d.reset_index(drop = True)

In [8]:
target_df = []
for col_name in df_bp_gr_7d.drop('Name', axis = 1).columns:
    target_df.append(
        pd.DataFrame([ row for row in df_bp_gr_7d[col_name][:]], columns = [col_name + str(i+1) for i in range(day_param)])
    )
target_df = pd.concat(target_df, axis = 1).dropna()

# Determine labels

In [9]:
target_df['msbp'] = target_df.loc[:, :'SBP14'].mean(axis = 1)
target_df['mdbp'] = target_df.loc[:, 'DBP1':'DBP14'].mean(axis = 1)
target_df.reset_index(inplace = True, drop = True)

In [10]:
target_df['labels'] = 0
s_hyflag = target_df['msbp'] >= 135
target_df.loc[s_hyflag == True, 'labels'] = int(1)

d_hyflag = target_df['mdbp'] >= 85
target_df.loc[d_hyflag == True, 'labels'] = int(2)

sd_hyflag = np.logical_and( target_df['msbp'] >= 135, target_df['mdbp'] >= 85)
target_df.loc[sd_hyflag == True, 'labels'] = int(3)


In [11]:
target_df_arr = target_df.iloc[:, :-3].values.reshape(len(target_df), len(target_df.iloc[:, :-3].columns) // day_param, day_param).transpose(0,2,1)

In [12]:
target_df_arr.shape

(8219, 14, 3)

# Wirte Data

In [13]:
np.savez('/home/hiroshi_koshimizu/work/RCGAN/inputs/bp_data.npz',x = target_df_arr, y = target_df['labels'].values)

In [14]:
target_df['labels'].value_counts()

0    5960
1    1241
3     514
2     504
Name: labels, dtype: int64

In [15]:
target_df_arr

array([[[155.5,  85. ,  69. ],
        [159. ,  90.5,  74. ],
        [150.5,  87. ,  70.5],
        ...,
        [131.5,  80.5,  63. ],
        [156. ,  81.5,  67.5],
        [153. ,  85.5,  67. ]],

       [[153.5,  84. ,  74. ],
        [135. ,  79.5,  71. ],
        [130. ,  79.5,  68. ],
        ...,
        [146. ,  83. ,  65.5],
        [148. ,  83. ,  62.5],
        [142.5,  83. ,  67.5]],

       [[144.5,  77.5,  65.5],
        [152.5,  80. ,  65.5],
        [173. ,  89. ,  67.5],
        ...,
        [134.5,  78.5,  69.5],
        [144.5,  78. ,  72.5],
        [152. ,  80.5,  69. ]],

       ...,

       [[141. ,  99.5,  47.5],
        [145.5, 101. ,  54.5],
        [129.5,  91.5,  49. ],
        ...,
        [133. ,  94.5,  56.5],
        [126.5,  90.5,  53.5],
        [137.5,  91. ,  48. ]],

       [[120. ,  85.5,  49. ],
        [121. ,  86.5,  49. ],
        [131. ,  91. ,  50. ],
        ...,
        [124. ,  92.5,  53. ],
        [117. ,  83. ,  45.5],
        [130. ,