In [None]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from os import path
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
train_data_path = 'gdrive/My Drive/Summer Research/Simulated CGM Data/'
test_data_path = 'gdrive/My Drive/Summer Research/Glucose/Diabetes/'
directory = 'Cleaned Data/'

Check how much training data got uploaded

In [None]:
for i in range(1,11):
  num = 0
  for j in range(1,188):
    filename = 'adult#'+f'{i:03d}'+'_'+f'{j:03d}'+'.csv'
    if (path.exists(train_data_path+'Extracted/'+filename)):
      num = num + 1
  print(f'{i:03d}'+':'+f'{num:03d}')

001:187
002:187
003:000
004:187
005:187
006:187
007:187
008:187
009:000
010:187


Remove manual Glucose Measurements

In [None]:
def removeManualGlucoseMeasurements(filename):
  df = pd.read_csv(test_data_path+filename)
  
  drop_rows = []
  for i in range(len(df)):
    if df.values[i][3] == 'manual':
      drop_rows.append(i)
  
  df.drop(drop_rows).to_csv(test_data_path+directory+filename, index=False)

Make training data into 512 length CSVs

In [None]:
def extractCGMDays(filename):
  for days in range(1,int(96001/512)+1):
    df = pd.read_csv(train_data_path+filename, skiprows = lambda x: rowrange(x,days), usecols = [2])
    num = f'{days:03d}'
    df.to_csv(train_data_path+'Extracted/'+filename[:-4]+"_"+num+".csv", index=False)  

def rowrange(index, days):
  if index == 0:
    return True
  elif index in range(512*(days-1)+1,512*days+1):
    return False
  return True

In [None]:
for i in range(9):
  removeManualGlucoseMeasurements('glucose ('+str(i+1)+').csv')

for i in range(10):
  if (i != 2) and (i != 8):
    extractCGMDays('adult#'+f'{(i+1):03d}'+'.csv')

Wavelet transformation (we will specify which value of n to use):

function[] = Wavelet_Transform(filename, n)
    %Filter banks
    h0 = [0.2697890;0.3947890;0.5197890;0.6447890;
        0.2302110;0.1052110;-0.0197890;-0.1447890];
    h1 = [-0.2825435;0.5553379;0.2385187;-0.0783004;
        -0.5834819;-0.2666627;0.0501564;0.3669755];
    h2 = [0.4125840;-0.6279376;0.3727824;0.1487574;
        -0.4125840;-0.1885590;0.0354659;0.2594909];
    h3 = [0.2382055;0.1088646;-0.7275830;0.5572896;
        -0.2382055;-0.1088646;0.0204763;0.1498171];
    %Matrix of filter banks created for convenience
    h = [h0';h1';h2';h3'];

    k = n/4;
    T = zeros(n);
    for j = 1:1:4
        for i = 1:1:k
            if 4*(i-1)+8 > 4*k
                T(k*(j-1)+i,(4*(i-1)+1):1:(4*(i-1)+4)) = h(j,1:1:4);
                T(k*(j-1)+i,1:1:4) = h(j,5:1:8);
            else
                T(k*(j-1)+i,(4*(i-1)+1):1:(4*(i-1)+8)) = h(j,:);
            end
        end
    end

In [None]:
def fourWTM(n):
  #Filter banks
  h0 = np.array([0.2697890,0.3947890,0.5197890,0.6447890,0.2302110,0.1052110,-0.0197890,-0.1447890])
  h1 = np.array([-0.2825435,0.5553379,0.2385187,-0.0783004, -0.5834819,-0.2666627,0.0501564,0.3669755])
  h2 = np.array([0.4125840,-0.6279376,0.3727824,0.1487574, -0.4125840,-0.1885590,0.0354659,0.2594909])
  h3 = np.array([0.2382055,0.1088646,-0.7275830,0.5572896, -0.2382055,-0.1088646,0.0204763,0.1498171])
  #Matrix of filter banks created for convenience
  h = np.array([h0,h1,h2,h3])

  k = int(n/4)
  T = np.zeros((n,n))
  for j in range(4):
    for i in range(k):
      if 4*i+8 > 4*k:
        T[k*j+i,range((4*i),(4*i+4))] = h[j,range(4)]
        T[k*j+i,range(4)] = h[j,range(4,8)]
      else:
        T[k*j+i,range((4*i),(4*i+8))] = h[j,range(8)]
  return T

In [None]:
def four_Wavelet_Transform(filename,n):
  cgm = np.loadtxt(train_data_path+'Extracted/'+filename, delimiter=',')
  ts = np.matmul(fourWTM(n),cgm)
  for j in range(1,4):
    r = range(int(j*len(cgm)/4),int((j+1)*len(cgm)/4))
    lbda = np.std(ts[r])*math.sqrt(2*math.log(n/4))
    for i in r:
      if abs(ts[i]) < lbda:
        ts[i] = 0
  cleanedCGM = np.matmul(np.transpose(fourWTM(n)),ts)
  np.savetxt(train_data_path+'Wavelet Transformed Data/'+filename, cleanedCGM, delimiter=',', fmt='%f')

In [None]:
for i in range(1,11):
  for j in range(1,188):
    filename = 'adult#'+f'{i:03d}'+'_'+f'{j:03d}'+'.csv'
    if (path.exists(train_data_path+'Extracted/'+filename)):
      four_Wavelet_Transform(filename,512)

In [None]:
for i in range(1,11):
  num = 0
  for j in range(1,188):
    filename = 'adult#'+f'{i:03d}'+'_'+f'{j:03d}'+'.csv'
    if (path.exists(train_data_path+'Wavelet Transformed Data/'+filename)):
      num = num + 1
  print(f'{i:03d}'+':'+f'{num:03d}')

001:187
002:187
003:000
004:187
005:187
006:187
007:187
008:187
009:000
010:187


Decompose CGM data into A1, D1, D2, and D3

In [None]:
def four_Wavelet_Transform_Decomp(filename,n):
  cgm = np.loadtxt(train_data_path+'Extracted/'+filename, delimiter=',').reshape(512,1)
  t = fourWTM(n)
  ts = np.matmul(t,cgm)

  a1, d1, d2, d3 = ts[0:int(n/4)], ts[int(n/4):int(n/2)], ts[int(n/2):int(3*n/4)], ts[int(3*n/4):n]

  dim = int(n/4)
  A1 = np.zeros((4*dim,1))
  D1 = np.zeros((4*dim,1))
  D2 = np.zeros((4*dim,1))
  D3 = np.zeros((4*dim,1))

  for i in range(128):
    A1 = A1 + a1[i]*np.transpose(t[i:(i+1),:])

  for i in range(128):
    D1 = D1 + d1[i]*np.transpose(t[i+dim:(i+dim+1),:])
  
  for i in range(128):
    D2 = D2 + d2[i]*np.transpose(t[i+2*dim:(i+2*dim+1),:])

  for i in range(128):
    D3 = D3 + d3[i]*np.transpose(t[i+3*dim:(i+3*dim+1),:])
  
  np.savetxt(train_data_path+'Wavelet Transformed Data/A1/'+filename, A1, delimiter=',', fmt='%f')
  np.savetxt(train_data_path+'Wavelet Transformed Data/D1/'+filename, D1, delimiter=',', fmt='%f')
  np.savetxt(train_data_path+'Wavelet Transformed Data/D2/'+filename, D2, delimiter=',', fmt='%f')
  np.savetxt(train_data_path+'Wavelet Transformed Data/D3/'+filename, D3, delimiter=',', fmt='%f')

  #plt.plot(cgm, label='CGM')
  #plt.plot(A1, label='A1')
  #plt.plot(D1, label='D1')
  #plt.plot(D2, label='D2')
  #plt.plot(D3, label='D3')
  #plt.xlabel('Time (Every 3 minutes)')
  #plt.ylabel('CGM')
  #plt.legend()
  #plt.show()
  #test = np.zeros((512, 1))
  #test = A1+D1+D2+D3
  #print(np.linalg.norm(cgm-test))

In [None]:
for i in range(1,11):
  for j in range(1,188):
    filename = 'adult#'+f'{i:03d}'+'_'+f'{j:03d}'+'.csv'
    if (path.exists(train_data_path+'Extracted/'+filename)):
      four_Wavelet_Transform_Decomp(filename,512)

In [None]:
for i in range(1,11):
  num = 0
  for j in range(1,188):
    filename = 'adult#'+f'{i:03d}'+'_'+f'{j:03d}'+'.csv'
    if (path.exists(train_data_path+'Wavelet Transformed Data/D1/'+filename)):
      num = num + 1
  print(f'{i:03d}'+':'+f'{num:03d}')

001:187
002:187
003:000
004:187
005:187
006:187
007:187
008:187
009:000
010:187
