# Data pre-processing

Code refers to dataset preprocessing proposed in A Survey on Behaviour Recognition Using WiFi Channel State Information (https://github.com/ermongroup/Wifi_Activity_Recognition)

Here's a breakdown of what the code does:

 - It initializes two empty arrays, xx and yy, to store the input data and annotation data, respectively.
 - It imports the input data from CSV files specified by path1. The code iterates over each file, reads the data from the CSV file, and converts it into a numpy array (tmp1).
 - It applies a sliding window technique to the input data. It iterates over the data in windows of size window_size with a sliding step of slide_size. For each window, it extracts the required values and appends them to the x2 array.
 - The x2 array is concatenated with the xx array to store all the input data.
 - The xx array is reshaped to have a specific number of rows and columns.
 - It imports the annotation data from CSV files specified by path2. The code follows a similar process as step 2 to read the annotation data from the CSV file and convert it into a numpy array (tmp2).
 - It applies a sliding window technique to the annotation data. For each window, it counts the occurrences of different labels within the window and assigns a specific binary label based on the threshold. The resulting labels are stored in the y array.
 - The y array is concatenated with the yy array to store all the annotation data.
 - Finally, the function returns the xx and yy arrays.
 
After defining the dataimport function, the code enters the main section. It creates a directory called "input_files" if it doesn't already exist.

Next, it iterates over a list of labels: "bed", "fall", "pickup", "run", "sitdown", "standup", and "walk". For each label, it constructs file paths for the input and annotation CSV files based on the label. It also defines output file names for xx and yy based on the window size and threshold.

Then, it calls the dataimport function with the input and annotation file paths, and assigns the returned values to x and y.
Next, it writes the contents of x to the output CSV file defined by outputfilename1, and the contents of y to the output CSV file defined by outputfilename2.

Finally, it prints a message indicating the completion of the label processing.

In [None]:
import numpy as np,numpy
import csv
import glob
import os

window_size = 1000
threshold = 60
slide_size = 200 #less than window_size!!!

In [3]:

def dataimport(path1, path2):

    xx = np.empty([0,window_size,90],float)
    yy = np.empty([0,8],float)

    ###Input data###
    #data import from csv
    input_csv_files = sorted(glob.glob(path1))
    for f in input_csv_files:
        print("input_file_name=",f)
        data = [[ float(elm) for elm in v] for v in csv.reader(open(f, "r"))]
        tmp1 = np.array(data)
        x2 =np.empty([0,window_size,90],float)

        #data import by slide window
        k = 0
        while k <= (len(tmp1) + 1 - 2 * window_size):
            x = np.dstack(np.array(tmp1[k:k+window_size, 1:91]).T)
            x2 = np.concatenate((x2, x),axis=0)
            k += slide_size

        xx = np.concatenate((xx,x2),axis=0)
    xx = xx.reshape(len(xx),-1)

    ###Annotation data###
    #data import from csv
    annotation_csv_files = sorted(glob.glob(path2))
    for ff in annotation_csv_files:
        print("annotation_file_name=",ff)
        ano_data = [[ str(elm) for elm in v] for v in csv.reader(open(ff,"r"))]
        tmp2 = np.array(ano_data)

        #data import by slide window
        y = np.zeros(((len(tmp2) + 1 - 2 * window_size)//slide_size+1,8))
        k = 0
        while k <= (len(tmp2) + 1 - 2 * window_size):
            y_pre = np.stack(np.array(tmp2[k:k+window_size]))
            bed = 0
            fall = 0
            walk = 0
            pickup = 0
            run = 0
            sitdown = 0
            standup = 0
            noactivity = 0
            for j in range(window_size):
                if y_pre[j] == "bed":
                    bed += 1
                elif y_pre[j] == "fall":
                    fall += 1
                elif y_pre[j] == "walk":
                    walk += 1
                elif y_pre[j] == "pickup":
                    pickup += 1
                elif y_pre[j] == "run":
                    run += 1
                elif y_pre[j] == "sitdown":
                    sitdown += 1
                elif y_pre[j] == "standup":
                    standup += 1
                else:
                    noactivity += 1

            if bed > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,1,0,0,0,0,0,0])
            elif fall > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,1,0,0,0,0,0])
            elif walk > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,0,1,0,0,0,0])
            elif pickup > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,0,0,1,0,0,0])
            elif run > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,0,0,0,1,0,0])
            elif sitdown > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,0,0,0,0,1,0])
            elif standup > window_size * threshold / 100:
                y[k//slide_size,:] = np.array([0,0,0,0,0,0,0,1])
            else:
                y[k//slide_size,:] = np.array([2,0,0,0,0,0,0,0])
            k += slide_size

        yy = np.concatenate((yy, y),axis=0)
    print(xx.shape,yy.shape)
    return (xx, yy)


#### Main ####
if not os.path.exists("Dataset/processed_data/"):
        os.makedirs("Dataset/processed_data/")

for i, label in enumerate (["bed", "fall", "pickup", "run", "sitdown", "standup", "walk"]):
    filepath1 = "./Dataset/raw_data/input_*" + str(label) + "*.csv"
    filepath2 = "./Dataset/raw_data/annotation_*" + str(label) + "*.csv"
    outputfilename1 = "./Dataset/processed_data/xx_" + str(window_size) + "_" + str(threshold) + "_" + label + ".csv"
    outputfilename2 = "./Dataset/processed_data/yy_" + str(window_size) + "_" + str(threshold) + "_" + label + ".csv"

    x, y = dataimport(filepath1, filepath2)
    with open(outputfilename1, "w") as f:
        writer = csv.writer(f, lineterminator="\n")
        writer.writerows(x)
    with open(outputfilename2, "w") as f:
        writer = csv.writer(f, lineterminator="\n")
        writer.writerows(y)
    print(label + "finish!")


input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_1.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_10.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_2.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_3.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_4.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_5.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_6.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_7.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_8.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_bed_9.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_bed_1.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_bed_10.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_bed_2.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_bed_3.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_bed_4.cs

annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1140_03.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1141_04.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1142_05.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1148_06.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1149_07.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1150_08.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1151_09.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1152_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1153_11.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1154_12.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1155_13.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1156_14.csv
annotation_file_name= ./Dataset/raw_data\annotation_bed_170309_1157_15.csv
annotation_file_name= ./D

annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1138_04.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1138_05.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1141_06.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1141_07.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1142_08.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1143_09.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1143_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1144_11.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1145_12.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1146_13.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1146_14.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1147_15.csv
annotation_file_name= ./Dataset/raw_data\annotation_fall_170310_1148_16.csv
annotation_f

input_file_name= ./Dataset/raw_data\input_pickup_170309_1230_18.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1231_19.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1232_20.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1234_01.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1235_02.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1236_03.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1237_04.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1238_05.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1239_06.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1240_07.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1241_08.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1242_09.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1243_10.csv
input_file_name= ./Dataset/raw_data\input_pickup_170309_1244_11.csv
input_file_name= ./Dataset/raw_data\input_pickup

annotation_file_name= ./Dataset/raw_data\annotation_sankalp_pickup_9.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_1.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_2.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_3.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_4.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_5.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_6.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_7.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_8.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_pickup_9.csv
(7200, 90000) (7200, 8)
pickupfinish!
input_file_name= ./Dataset/raw_data\input_161219_sankalp_run_1.csv
input_file_name= ./Dataset/raw_data\input_161219_sankalp_run_10.csv
input_file_name= ./Dataset/raw_da

annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1310_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1311_11.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1312_12.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1313_13.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1314_14.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1315_15.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1316_16.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1317_17.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1318_18.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1319_19.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170309_1320_20.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170316_1425_01.csv
annotation_file_name= ./Dataset/raw_data\annotation_run_170316_1426_02.csv
annotation_file_name= ./D

input_file_name= ./Dataset/raw_data\input_sitdown_170309_1100_05.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1101_06.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1103_07.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1104_08.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1106_09.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1107_10.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1108_11.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1109_12.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1110_13.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1111_14.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1112_15.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1113_16.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1114_17.csv
input_file_name= ./Dataset/raw_data\input_sitdown_170309_1115_18.csv
input_file_name= ./Dataset/raw_dat

input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_2.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_3.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_4.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_5.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_6.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_7.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_8.csv
input_file_name= ./Dataset/raw_data\input_161219_siamak_standup_9.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1120_01.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1121_02.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1122_03.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1123_04.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1124_05.csv
input_file_name= ./Dataset/raw_data\input_standup_170308_1125_06.csv
input_file_name= ./Dataset

annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1348_09.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1349_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1350_11.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1351_12.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1352_13.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1353_14.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1354_15.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1355_16.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1356_17.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1357_18.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1358_19.csv
annotation_file_name= ./Dataset/raw_data\annotation_standup_170308_1359_20.csv
annotation_file_name= ./Dataset/raw_data\annotation_

annotation_file_name= ./Dataset/raw_data\annotation_sankalp_walk_5.csv
annotation_file_name= ./Dataset/raw_data\annotation_sankalp_walk_6.csv
annotation_file_name= ./Dataset/raw_data\annotation_sankalp_walk_7.csv
annotation_file_name= ./Dataset/raw_data\annotation_sankalp_walk_8.csv
annotation_file_name= ./Dataset/raw_data\annotation_sankalp_walk_9.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_1.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_10.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_2.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_3.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_4.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_5.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_6.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_7.csv
annotation_file_name= ./Dataset/raw_data\annotation_siamak_walk_8.csv
annotation_fil