In [1]:
# Take a csv file that has signals data and basically sample from it a bunch of datapoints however we want
# Could be in increments of time intervals 
# Basically say in the past time
# Take a activity dataset and basically sample however we want from it

# Load Signal and Activity Dataset

In [1]:
import pandas as pd

def load_signal_csv(csv_path):
    df = pd.read_csv(csv_path)
    # Remove columns that have invalid data
    clean_df = df.loc[:, ~df.columns.isin(['rssnr', 'cqi','ta'])]
    return clean_df

CSV_PATH = "./datasets/sig05.txt"
signal_df = load_signal_csv(CSV_PATH)


def load_activity_csv(path):
    # This dataframe contains time, activity, status(enter/exit)
    activity_df = pd.read_csv(path)
    # Ignore EXIT state since we only care about current state
    activity_df = activity_df[activity_df['status'] != "EXIT"]
    return activity_df

ACTIVITY_PATH = "./datasets/processed_act05.txt"
activity_df = load_activity_csv(ACTIVITY_PATH)

In [4]:
signal_df.head()


Unnamed: 0,time,mTimeStamp,mPci,mTac,mEarfcn,mMcc+mMnc,ss,rsrp,rsrq
0,11-07 16:52:51.012,22841626089569,116,16185,2300,310260,20,-103,-8
1,11-07 16:52:51.270,22841626089569,116,16185,2300,310260,20,-103,-8
2,11-07 16:52:51.294,22841626089569,116,16185,2300,310260,20,-103,-8
3,11-07 16:52:51.325,22841626089569,116,16185,2300,310260,20,-103,-8
4,11-07 16:52:51.616,22841626089569,116,16185,2300,310260,20,-103,-8


In [5]:
activity_df.head()

Unnamed: 0,time,activity,status
0,11-07 16:50:26.886,STILL,ENTER
1,11-07 16:50:26.895,STILL,ENTER
2,11-07 16:52:57.316,STILL,ENTER
3,11-07 16:52:57.331,STILL,ENTER
4,11-07 16:52:57.337,STILL,ENTER


# Merge Datasets

In [9]:
def search_activity_status_at_time(query_time, activity_df):

    # Searches the activity_df to get the
    # state of the device at time
    
    no_value_string = "NOVALUE"
    last_observed_state = no_value_string
    activity_time_passed_query_time = False
    
    for _,row in activity_df.iterrows():
        if query_time < row['time']:
            # Passed the state we wanted
            activity_time_passed_query_time = True
            break
        
        last_observed_state = row['activity']
    
    if not activity_time_passed_query_time:
        # time is sometime in the future after acitivity data
        last_observed_state = no_value_string
    
    return last_observed_state

# Example of searching for activity at specific time
# search_activity_status_at_time("11-06 16:50:26.900", activity_df)
# search_activity_status_at_time("11-07 16:50:26.900", activity_df)
def create_merged_dataset(signal_df, activity_df):
    merged_df = signal_df.copy()
    merged_df['activity'] = "INVALID"
    for i,row, in signal_df.iterrows():
        merged_df.loc[i,'activity'] = search_activity_status_at_time(row['time'], activity_df)
    return merged_df

In [10]:
merged_df = create_merged_dataset(signal_df, activity_df)
merged_df.head()

Unnamed: 0,time,mTimeStamp,mPci,mTac,mEarfcn,mMcc+mMnc,ss,rsrp,rsrq,activity
0,11-07 16:52:51.012,22841626089569,116,16185,2300,310260,20,-103,-8,STILL
1,11-07 16:52:51.270,22841626089569,116,16185,2300,310260,20,-103,-8,STILL
2,11-07 16:52:51.294,22841626089569,116,16185,2300,310260,20,-103,-8,STILL
3,11-07 16:52:51.325,22841626089569,116,16185,2300,310260,20,-103,-8,STILL
4,11-07 16:52:51.616,22841626089569,116,16185,2300,310260,20,-103,-8,STILL
