<a href="https://colab.research.google.com/github/AliAqdas-repo/FallDetection/blob/main/FD_DataOrganization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data Organization and Structuring
The goal of this notebook is to download the dataset and organize necessary data so that it could be used to train a Deep Learning Model for Fall Detection

##Importing Dependencies

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import re
import csv

##Downloading Dataset from Drive
The dataset downloaded in the first block is copied directly to Google Drive to avoid downloading again and again and can be just downloaded over Google Servers from **Drive**

In [None]:
#Downloading Dataset from Google Drive
!gdown --id 1-J5TBYvi---DW68UvVrUMhyF2u-QZLdj

In [None]:
#Unzipping Dataset
if not os.path.exists('/content/Dataset/'):
  os.mkdir('/content/Dataset/')
!unzip /content/dataset.zip -d /content/Dataset/

## Loading the Dataset in Colab

In [None]:
#Function that Enlists Full Path of Data Files
def list_full_path(dir):
  return [os.path.join(dir,os.path.splitext(fi)[0]) for fi in os.listdir(dir)]

In [None]:
data_files=list_full_path('/content/Dataset/FallAllD')

In [None]:
data_files[0]

In [None]:
#Length of Data
len(data_files)

In [None]:
#Extracts the Activity Code
int(data_files[0][34:37])

##Splitting Data
Splitting entire dataset into Fall and Activities of Daily Life(ADLs)

In [None]:
fall_files=[]
not_fall_files=[]
for datfile in data_files:
    if not (datfile[-1]=='B' or datfile[-1]=='M'): #Rejecting Barometer and Magnetometer Data
    #Splitting into Fall and Not Fall based on ActivtyID2Str.m file
      if 100<int(datfile[34:37])<136:
        fall_files.append(str(datfile))
      else:
        not_fall_files.append(str(datfile))

In [None]:
#Length of All Files
len(not_fall_files)
#Number of Sets of Sensor Measurements. We divide by 4 because we have data from 4 sensors per subject/measurement.
len(not_fall_files)/4

## Fusing Sensor Data
We are fusing sensor data from different sensors with different sampling rates to create a single file that can be used as input to a Neural Network

In [None]:
#Sorting the Data
fall_files=sorted(fall_files)
not_fall_files=sorted(not_fall_files)

In [None]:
def read_into_array(filedir):
  #reads data into array. If file extension isn't displayed in filedir then its default to .dat
  ext=os.path.splitext(filedir)[1]
  if ext=='':
    ext='.dat'
  with open(os.path.splitext(filedir)[0]+ext,'r') as datfile :
    file_data=csv.reader(datfile,delimiter=',')
    data=list()
    for row in file_data:
      data.append([float(val) for val in row])
  return data

In [None]:
#Order of Files: Acc Bar Gyro Magn
import cv2
def save_combined_data(input_files,output_dir=None,AG_ONLY=True):
############### INPUT PARAMETERS ##################
#--------------------------------------------------
  #input_files = Input File Paths
  #output_dir = Directory to Output Combined Data
  #AG_ONLY = Accelration and Gyro Data Only(True)
#--------------------------------------------------

  OUTPUT_SAMPLES=952
  TMP_MAG_SAMPLES=4800
  TMP_BAR_SAMPLES=1000

  if AG_ONLY:
    GAP=2
    TOTAL_CHANNELS=6;

  #Channel Assignment Order
  #-------------------------------------
  # Channel  1-3 - Accelerometer 
  # Channel  4-6 - Gyroscope
  #-------------------------------------
  else:
    GAP=4
    TOTAL_CHANNELS=10;
  dataset=np.empty((len(input_files)//GAP,OUTPUT_SAMPLES,TOTAL_CHANNELS))
  
  #Channel Assignment Order
  #-------------------------------------
  # Channel  1-3 - Accelerometer 
  # Channel  4   - Barometer
  # Channel  5-7 - Gyroscope
  # Channel  8-10 - Magnetometer
  #-------------------------------------
  
  for k in range(0,len(input_files),GAP):
    data=np.empty((OUTPUT_SAMPLES,TOTAL_CHANNELS))
    co_channel=0 #Current Output Channel, Used to Iterate Data in Loop
    for ip_file in input_files[k+0:k+GAP]:
      datpts=read_into_array(ip_file)
      ip_datpts=[[None]*3]*OUTPUT_SAMPLES
      if len(datpts)==200:
        CHANNELS=1
        tmp_bar_pts=np.asarray([[0.0]*CHANNELS]*TMP_BAR_SAMPLES)
        #Upsample by a factor of 5 and dropping last 48 samples
        for i in range(0,5):
          tmp_bar_pts[i::5,0]=np.asarray(datpts)[:,0]      
        ip_datpts=tmp_bar_pts[0:OUTPUT_SAMPLES] 

      elif len(datpts)==1600:
        CHANNELS=3
        tmp_mag_pts=[[None]*CHANNELS]*TMP_MAG_SAMPLES
        #Zero Order Hold to Upsample Data by a Factor of 3
        tmp_mag_pts[0::3]=datpts
        tmp_mag_pts[1::3]=datpts
        tmp_mag_pts[2::3]=datpts
        #Dropping last 40 Samples and Downsampling data by factor of 5
        ip_datpts=tmp_mag_pts[0:4760:5]
      
      elif len(datpts)==4760:
        CHANNELS=3
        ip_datpts=datpts[::5]
      else:
        print('Error')
        break
      print(ip_file)
      data[:,co_channel:co_channel+CHANNELS]=np.asarray(ip_datpts) #assigning channels to data from files
      co_channel=co_channel+CHANNELS
      
      
    if not output_dir==None:
      print('Saving Data')
      if not output_dir[-1]=='/':
        if not os.path.exists(output_dir):
          os.makedirs(output_dir)
        np.savetxt(f'{output_dir}/{ip_file[26:-2]}.csv',data,delimiter=",")
        
      else:
        if not os.path.exists(output_dir):
          os.makedirs(output_dir[:-1])
        np.savetxt(f'{output_dir}/{ip_file[26:-2]}.csv',data,delimiter=",")
        
    else:
      dataset[k//GAP]=data

  if output_dir==None:
    return dataset

In [None]:
save_combined_data(fall_files,'/content/data_proc_AG/fall_files/',True)
save_combined_data(not_fall_files,'/content/data_proc_AG/not_fall_files/',True)

The data can now be zipped and uploaded to Google Drive to save the effort for the next time you work on this project.

In [None]:
!zip -r /content/data_proc_AG.zip /content/data_proc_AG/
!cp /content/data_proc_AG.zip /content/drive/MyDrive/Datasets/FallDetect/data_proc_AG.zip