In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

  from pandas.core.computation.check import NUMEXPR_INSTALLED


To mark out activity bouts, we follow the following steps:
- Fourier Transform the data into frequency domain
- Filter noises
- Apply the trained model in improved_classification_model.ipynb


In [None]:
# We first want to find continuous intervals with data
def interval_grabber(df):
  df.columns = ['datetime', 'acc_x', 'acc_y', 'acc_z']

  interval_dfs = []
  # For acc_x
  df['interval'] = (df['acc_x'].notna() != df['acc_x'].notna().shift()).cumsum()
  # Create new dataframes for each interval
  x_interval_dfs = {}
  for interval_id, interval_group in df.groupby('interval'):
    if interval_group['acc_z'].notna().any():
      x_interval_dfs[interval_id] = interval_group.drop(columns='interval')
  interval_dfs.append(x_interval_dfs)

  # For acc_y
  df['interval'] = (df['acc_y'].notna() != df['acc_y'].notna().shift()).cumsum()
  # Create a new dataframe for each interval
  y_interval_dfs = {}
  for interval_id, interval_group in df.groupby('interval'):
    if interval_group['acc_y'].notna().any():
      y_interval_dfs[interval_id] = interval_group.drop(columns='interval')
  interval_dfs.append(y_interval_dfs)

  # For acc_z
  df['interval'] = (df['acc_z'].notna() != df['acc_z'].notna().shift()).cumsum()
  # Create a new dataframe for each interval
  z_interval_dfs = {}
  for interval_id, interval_group in df.groupby('interval'):
    if interval_group['acc_z'].notna().any():
      z_interval_dfs[interval_id] = interval_group.drop(columns='interval')
  interval_dfs.append(z_interval_dfs)

  return interval_dfs

We now apply Fourier transform and filter out noise

In [None]:
def denoise(df):
  # For each accelerometry component, we take the 17 highest signals.

  n = len(df['acc_x'])
  f_hat = np.fft.fft(df['acc_x'])
  PSD = f_hat * np.conj(f_hat) / n
  freq = (1/n) * np.arange(n)
  min_amplitude = np.partition(PSD, -17)[-17]
  indices = PSD >= min_amplitude
  PSD_filtered = PSD * indices
  f_hat_filtered = f_hat * indices
  f_filtered = np.fft.ifft(f_hat_filtered)
  df['denoised_acc_x'] = f_filtered

  n = len(df['acc_y'])
  f_hat = np.fft.fft(df['acc_y'])
  PSD = f_hat * np.conj(f_hat) / n
  freq = (1/n) * np.arange(n)
  min_amplitude = np.partition(PSD, -17)[-17]
  indices = PSD >= min_amplitude
  PSD_filtered = PSD * indices
  f_hat_filtered = f_hat * indices
  f_filtered = np.fft.ifft(f_hat_filtered)
  df['denoised_acc_y'] = f_filtered

  n = len(df['acc_z'])
  f_hat = np.fft.fft(df['acc_z'])
  PSD = f_hat * np.conj(f_hat) / n
  freq = (1/n) * np.arange(n)
  min_amplitude = np.partition(PSD, -17)[-17]
  indices = PSD >= min_amplitude
  PSD_filtered = PSD * indices
  f_hat_filtered = f_hat * indices
  f_filtered = np.fft.ifft(f_hat_filtered)
  df['denoised_acc_z'] = f_filtered

  df['denoised_acc_x'] = df['denoised_acc_x'].apply(lambda x: x.real)
  df['denoised_acc_y'] = df['denoised_acc_y'].apply(lambda x: x.real)
  df['denoised_acc_z'] = df['denoised_acc_z'].apply(lambda x: x.real)

  df.drop(columns=['acc_x', 'acc_y', 'acc_z'], inplace=True)
  return df

In [None]:
# df = pd.read_csv('/content/drive/MyDrive/ResearchProject/datafiles/001/ACC_001.csv')
# df.head()

In [None]:
#df.columns = ['datetime', 'acc_x', 'acc_y', 'acc_z']

In [None]:
import os

In [None]:
def denoising_intervals_old(ind):
  folder_path = f'/content/drive/MyDrive/ResearchProject/datafiles/{ind}/segmented'
  all_files = os.listdir(folder_path)
  csv_files = [file for file in all_files if file.endswith('.csv')]

  new_folder_name = "denoised_interval"
  new_folder_path = os.path.join(f'/content/drive/MyDrive/ResearchProject/datafiles/{ind}', new_folder_name)
  os.makedirs(new_folder_path, exist_ok=True)

  for file in csv_files:
    df = pd.read_csv(os.path.join(folder_path, file))
    day = df['day'].iloc[0]
    component_list = interval_grabber(df)
    for interval_dfs in component_list:
      for interval_id, interval_df in interval_dfs.items():
        interval_df = denoise(interval_df)
        #interval_df = interval_df.drop(columns=['acc_x', 'acc_y', 'acc_z'])
        interval_df.to_csv(f'/content/drive/MyDrive/ResearchProject/datafiles/{ind}/denoised_interval/denoised_ACC_{ind}_day{day}_{interval_id}.csv')

def denoising_intervals(ind):
  folder_path = f"/home/d_nguyen11/Documents/reu_stats_2024/datafiles/{ind}"
  df = pd.read_csv(os.path.join(folder_path, f'ACC_{ind}.csv'))
  component_list = interval_grabber(df)
  for interval_dfs in component_list:
    for interval_id, interval_df in interval_dfs.items():
      interval_df = denoise(interval_df)
      interval_df.to_csv(f'/home/d_nguyen11/Documents/reu_stats_2024/datafiles/{ind}/denoised_ACC_{ind}_{interval_id}.csv')




In [None]:
ind_list = []
for i in range(8, 17):
  if i < 10:
    ind_list.append(f'00{i}')
  else:
    ind_list.append(f'0{i}')

for ind in ind_list:
  denoising_intervals(ind)