In [2]:
import pandas as pd
import numpy as np

import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import random_split, DataLoader, Dataset

In [3]:
batch_size = 32
val_percentage = 0.1
test_percentage = 0.1

seed = 1536282

In [4]:
data = pd.read_csv("data/roomPredictionData2.csv")
data.count()

room    3279
id      3279
rssi    3279
time    3279
dtype: int64

In [5]:
data.room.unique()

array(['Office', 'Hallway', 'Kitchen', 'Living Room', 'Bedroom'],
      dtype=object)

Below is converting the time to int64. This will be used to group points / second to triangulate location. Each of these groups will be used as a data point to be passed through the model

In [11]:
data["time_int"] = data.loc[:, "time"].astype(np.int64)

In [12]:
data

Unnamed: 0,room,id,rssi,time,time_int
0,Office,C89F2066-D85B-7A21-E292-A1735A30A295,-83.0,7.420843e+08,742084271
1,Office,216D23F7-E6A1-7727-A3D5-91C76258F327,-59.0,7.420843e+08,742084271
2,Office,11FE69C7-7DD2-13AF-1493-7B63E92D5B0D,-56.0,7.420843e+08,742084271
3,Office,DB047FFD-D211-EDF3-345E-1FE933920DC2,-83.0,7.420843e+08,742084271
4,Office,AB8FA414-1C59-7DE7-F7EF-87C6CF2A303B,-79.0,7.420843e+08,742084271
...,...,...,...,...,...
3274,Bedroom,94CE6244-F55B-5A57-E0D3-400D3E564F48,-76.0,7.420847e+08,742084682
3275,Bedroom,94CE6244-F55B-5A57-E0D3-400D3E564F48,-75.0,7.420847e+08,742084682
3276,Bedroom,767B7B8B-5B66-F187-5E3F-2D91165B7273,-98.0,7.420847e+08,742084682
3277,Bedroom,94CE6244-F55B-5A57-E0D3-400D3E564F48,-72.0,7.420847e+08,742084683


# DataLoader


In [40]:
class Data(Dataset):
    
    def __init__(self, data_frame: pd.DataFrame):
        self.X = torch.from_numpy(data_frame.loc[:, data_frame.columns != "room"].to_numpy())
        self.y = torch.from_numpy(data_frame.loc[:,"room"].to_numpy())
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return len(self.y)
    

In [39]:
    
def split(data, val_perc, test_perc, seed = None):
    
    # Get total amount
    amount = len(data)
    
    # Get test amount
    test_amount = (
        int(amount * test_perc)
        if test_perc is not None else 0)
    # Get validation amount
    val_amount = (
        int(amount * val_perc)
        if val_perc is not None else 0)
    # Get train amount
    train_amount = amount - test_amount - val_amount
    
    train, val, test = random_split(
        data,
        (train_amount, val_amount, test_amount),
        # If we have a seed, pass that in to a torch generator
        generator=(
            torch.Generator().manual_seed(seed)
            if seed
            else None))
    
    return train, val, test
    

train, val, test = split(data, val_percentage, test_percentage, seed)
train = Data(train)
val = Data(val)
test = Data(test)

train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)

AttributeError: 'Subset' object has no attribute 'loc'

In [41]:
Data(data)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

# Model

In [None]:
class Classifier(nn.Module):
    
    def __init__(self, num_hidden, num_outputs, dropout=0.5):
        super().__init__()
        self.net = nn.Sequential(
            nn.LazyLinear(num_hidden), nn.ReLU(), nn.Dropout(dropout),
            nn.LazyLinear(num_outputs), nn.Softmax()
            
        )
        
    def forward(self, X):
        return self.net(X)
        