<a href="https://colab.research.google.com/github/idanh8/IoT_project_accelerometer_data/blob/main/activity_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports and Setup

In [10]:
import pandas as pd
import numpy as np
import zipfile
import io
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import pickle

Utility Functions

In [2]:
def norm(x, y, z):
  return (x**2 + y**2 + z**2)**0.5

In [3]:
def process_raw(zip_path):
  count = 0
  datasets = {}
  with zipfile.ZipFile(zip_path, 'r') as z:
    for file_name in z.namelist():
      if file_name == '8_walk_4_3.csv':
        continue
      file_data = {}
      count += 1
      csv_data = z.read(file_name)

      header = csv_data.decode().split('\n')[:5]
      header = [item.lower() for item in header]
      header = [item.replace('"', '') for item in header]
      rows = csv_data.decode().split('\n')[5:]
      df = pd.read_csv(io.StringIO('\n'.join(rows)))
      activity = 0 if ('walk' in header[0] or 'walking' in header[2]) else 1
      steps = int(header[3].split(',')[1])


      file_data.update([('Name', file_name),('Data', df), ('Steps', steps), ('Activity', activity)])
      exec(f"datasets[{count-1}] = file_data")
  print(f'There are {count} files in the dataset')
  problems = ['11_walk_5_1.csv', '6_run_3_1.csv', '6_run_4_1.csv','11_walk_1_1.csv', '11_walk_2_1.csv', '11_walk_3_1.csv', '6_walk_5_1.csv']
  for dataset in datasets.values():
    if dataset['Name'] in problems:
      df = dataset['Data']
      df = df.drop(0)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '16_run_3_1.csv':
      df = dataset['Data']
      df = df.drop(106)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '1_walk_4_1.csv':
      df = dataset['Data']
      df = df[:995]
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '31_walk_2_1.csv':
      df = dataset['Data']
      df = df.drop(207)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '4_run_2_2.csv':
      df = dataset['Data']
      df = df.drop(185)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '4_walk_1_3.csv':
      df = dataset['Data']
      df = df.drop(368)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '4_walk_2_3.csv':
      df = dataset['Data']
      df = df.drop(95)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '4_walk_4_2.csv':
      df = dataset['Data']
      df = df.drop(599)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '4_walk_3_2.csv':
      df = dataset['Data']
      df = df.drop(39)
      df = df.drop(41)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '5_run_3_1.csv':
      df = dataset['Data']
      df = df.drop(352)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '8_run_3_1.csv':
      df = dataset['Data']
      df = df.drop(264)
      df = df.drop(605)
      df = df.drop(606)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    if dataset['Name'] == '8_run_3_1.csv':
      df = dataset['Data']
      df = df.drop(603)
      df = df.drop(604)
      df = df.drop(605)
      df = df.reset_index(drop=True)
      dataset['Data'] = df
    df = dataset['Data']
    df['Norm'] = df.apply(lambda row: norm(float(row[1]), float(row[2]), float(row[3])), axis =1)
  return datasets

In [4]:
class Model(nn.Module):
    def __init__(self, input_size, output_size):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.relu = nn.ReLU()
        self.lrelu = nn.LeakyReLU()
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.lrelu(out)
        out = self.fc3(out)
        out = self.sigmoid(out)
        return out

In [5]:
def generate_dataset(dataset):
    data = []
    df = dataset['Data']
    df['Norm'] = df.apply(lambda row: norm(float(row[1]), float(row[2]), float(row[3])), axis =1)
    steps = dataset['Steps']
    act = dataset['Activity']
    slices = np.arange(0, len(df), 7)
    for sliver in slices:
      if (sliver + 7) > len(df):
        continue
      sliver = df.loc[sliver:sliver+6]
      acc = np.array(sliver.iloc[:, [1, 2, 3, 4]].values, dtype='float32')
      flat = torch.tensor(acc.reshape(1, -1))
      data.append(flat)
    stacked = torch.stack(data).to(torch.float32)
    label_tensor = torch.ones(stacked.shape[0])
    final_dataset = TensorDataset(stacked, label_tensor)
    return final_dataset, stacked.shape[0]

In [15]:
def predict(model, data):
    outcomes = []
    i = 0
    for dataset in data.values():
        i += 1
        testset, batch_size = generate_dataset(dataset)
        true_label = dataset['Activity']
        dataloader = DataLoader(testset, batch_size=batch_size, shuffle=True)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model.eval()
        with torch.no_grad():
          for batch in dataloader:
              batch_tensor, batch_labels = batch
              batch_tensor = batch_tensor.to(device)
              batch_labels = batch_labels.to(device)
              output = model(batch_tensor)
              predicted_class = torch.round(output).squeeze(dim=1)
              final_pred = 0 if predicted_class.mean() < 0.5 else 1
              acc = 1 if final_pred == true_label else 0
              activity = 'Walking' if final_pred == 0 else 'Running'
              label = 'Walking' if true_label == 0 else 'Running'
              print(f'Dataset {i}: True: {label}, Predicted: {activity}')
        outcomes.append(acc)
    print(f'Final Accuracy: {sum(outcomes)/len(outcomes)}')

Predictions

In [16]:
zip_path = 'data_set.zip' # change accordingly, please ensure a zipfile is passed containing csv files in the correct format
data = process_raw(zip_path)
model = pickle.load(open('activity_net.pkl', 'rb'))
predict(model, data)

There are 243 files in the dataset
Dataset 1: True: Running, Predicted: Running
Dataset 2: True: Running, Predicted: Walking
Dataset 3: True: Running, Predicted: Running
Dataset 4: True: Running, Predicted: Running
Dataset 5: True: Running, Predicted: Running
Dataset 6: True: Walking, Predicted: Walking
Dataset 7: True: Walking, Predicted: Walking
Dataset 8: True: Walking, Predicted: Walking
Dataset 9: True: Walking, Predicted: Walking
Dataset 10: True: Walking, Predicted: Walking
Dataset 11: True: Running, Predicted: Running
Dataset 12: True: Running, Predicted: Running
Dataset 13: True: Running, Predicted: Running
Dataset 14: True: Running, Predicted: Running
Dataset 15: True: Running, Predicted: Running
Dataset 16: True: Walking, Predicted: Walking
Dataset 17: True: Walking, Predicted: Walking
Dataset 18: True: Walking, Predicted: Walking
Dataset 19: True: Walking, Predicted: Walking
Dataset 20: True: Walking, Predicted: Walking
Dataset 21: True: Running, Predicted: Running
Dataset 