In [1]:
import numpy as np
import scipy.io
import scipy.stats as stats
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
candidate_paths = [
    './datasets',
]

start_at_second = 0
how_many_seconds = 20

# three constants for each dataset
csi_per_second   =   150
first_subcarrier = -1024
last_subcarrier  =  1023

new_csi_per_second = 30

first_csi = start_at_second * csi_per_second
last_csi  = first_csi + how_many_seconds * csi_per_second
new_first_csi = start_at_second * new_csi_per_second
new_last_csi  = first_csi + how_many_seconds * new_csi_per_second

actions = [('A','Walk'), ('B', 'Run'), ('C', 'Jump'), ('D', 'Sitting'), 
           ('E', 'Empty Room'), ('F', 'Stand'),('G', 'Wave hands'),  ('H', 'Clapping'),
           ('I', 'Lay'), ('J', 'Wiping'), ('K', 'Squat'), ('L', 'Stretching')] 


In [None]:
def undersample(candidate_paths, csi_per_second, new_csi_per_second, actions):
    for action in actions:
        filename = f'S1a_{action[0]}'
    # Read CSI from the first file found among the candidates
        for path in candidate_paths:
          full_path_name = f'{path}/dataset/{filename}.mat'
          if os.path.exists(full_path_name):
            csi = scipy.io.loadmat(full_path_name)['csi']
          else:
            print(f'File {full_path_name} not found')
        
        data_reshaped = csi.reshape(-1, int(csi_per_second/new_csi_per_second), 2048, 4)

    # Aggregation functions
        csi_mean = data_reshaped.mean(axis=1)
        csi_median = np.median(data_reshaped, axis=1)
        csi_max = data_reshaped.max(axis=1)
        csi_min = data_reshaped.min(axis=1)

    # Save the new CSI data
        with open(f'{path}/mean_dataset/{filename}.pkl', 'wb') as f:
            pickle.dump(csi_mean, f)
            print(f'saved mean {action[1]}')

        with open(f'{path}/median_dataset/{filename}.pkl', 'wb') as f:
            pickle.dump(csi_median, f)
            print(f'saved median {action[1]}')

        with open(f'{path}/max_dataset/{filename}.pkl', 'wb') as f:
            pickle.dump(csi_max, f)
            print(f'saved max {action[1]}')
        
        with open(f'{path}/min_dataset/{filename}.pkl', 'wb') as f:
            pickle.dump(csi_min, f)
            print(f'saved min {action[1]}')

undersample(candidate_paths, csi_per_second, new_csi_per_second, actions)

In [9]:
def calculate_statistics(name, actions, candidate_paths):

    statistic = []
    for action in actions:
        filename = f'S1a_{action[0]}'
    # Read CSI from the first file found among the candidates
        for path in candidate_paths:
            if name == 'original':
                full_path_name = f'{path}/dataset/{filename}.mat'
                if os.path.exists(full_path_name):
                    data = scipy.io.loadmat(full_path_name)['csi']
                    original_data = data  
                else:  
                    print(f'File {full_path_name} not found')
            else:
                full_path_name = f'{path}/{name}_dataset/{filename}.pkl'
                if os.path.exists(full_path_name):
                    with open(full_path_name, 'rb') as f:
                        data = pickle.load(f)
                
                else:
                    print(f'File {full_path_name} not found')

            mean = np.mean(data)
            std = np.std(data)
            ks_2samp = stats.ks_2samp(data, original_data)
            anderson_ksamp = stats.anderson_ksamp(data, original_data)

            statistic.append({
                'mean': mean,
                'std': std,
                'ks_2samp': ks_2samp,
                'anderson_ksamp': anderson_ksamp,
                'name': name,
                'action': action[1]
            }) 
    
    return statistic

In [10]:
path = candidate_paths[0]
if os.path.exists(f'{path}/undersampling_stats.pkl'):
    with open(f'{path}/undersampling_stats.pkl', 'rb') as f:
        statistics_df = pickle.load(f)
else: 
    statistics = []
    for name in ['original', 'mean', 'median', 'max', 'min']:
        statistics.extend(calculate_statistics(name, actions, candidate_paths))

    statistics_df = pd.DataFrame(statistics).set_index(['action', 'name']).sort_index()
    with open(f'{path}/undersampling_stats.pkl', 'wb') as f: 
        pickle.dump(statistics_df, f)

print(statistics_df)

                                       mean         std  \
action     name                                           
Clapping   max       551.905054+  0.811315j  589.192699   
           mean       -0.090386-  0.187506j  343.019603   
           median     -0.054237-  0.169580j  800.355762   
           min      -552.176540-  1.153405j  589.284000   
           original   -0.090386-  0.187506j  803.321057   
Empty Room max       654.590814+  0.855914j  715.043197   
           mean       -0.140244-  0.117582j  442.594006   
           median     -0.144450-  0.106379j  960.769667   
           min      -654.871352-  1.133747j  715.172993   
           original   -0.140244-  0.117582j  964.784135   
Jump       max       562.555467+  0.483552j  641.387154   
           mean       -0.138609-  0.162359j  404.119097   
           median     -0.137352-  0.138244j  814.438445   
           min      -562.872139-  0.844256j  641.414710   
           original   -0.138609-  0.162359j  832.655944 

In [None]:
def print_ft(csi, first, last, action):    
    fig, axs = plt.subplots(1,4)
    fig.suptitle(f'{action[1]}')
    for which_antenna in range(4):
        csi_to_show = np.fft.fftshift(csi[first:last, :, which_antenna])
        csi_to_show = np.transpose(csi_to_show)

        axs[which_antenna].imshow(abs(csi_to_show), extent=[new_first_csi, new_last_csi, first_subcarrier, last_subcarrier])


def fft_visual_compare():
    for action in actions:
        filename = f'S1a_{action[0]}'
        for name in ['original', 'mean', 'median', 'max', 'min']:
        # Read CSI from the first file found among the candidates
            for path in candidate_paths:
                if name == 'original':
                    full_path_name = f'{path}/dataset/{filename}.mat'
                    if os.path.exists(full_path_name):
                        csi = scipy.io.loadmat(full_path_name)['csi']
                        print_ft(csi, first_csi, last_csi, action)
                    else:  
                        print(f'File {full_path_name} not found')
                else:
                    full_path_name = f'{path}/{name}_dataset/{filename}.pkl'
                    if os.path.exists(full_path_name):
                        with open(full_path_name, 'rb') as f:
                            csi = pickle.load(f)
                            print_ft(csi, new_first_csi, new_last_csi, action)
                    
                    else:
                        print(f'File {full_path_name} not found')
        
    plt.show()

fft_visual_compare()