In [1]:
import pandas as pd
import numpy as np

---

## The Activities Data
### Importing the Activities Data
The file `S1Activities.csv` contains a tabulated summary of Heading, Category, Subcategory and a corresponding code.

In [2]:
dsActivities = pd.read_csv('S1Activities.csv', index_col = None)    # Importing the dataset

In [3]:
dsActivities.head(n=10)                                             # Checking the dataset

Unnamed: 0,Heading,Category,Subcategory,Code
0,Employment related,Employment work at home,Work at home,1
1,Employment related,Travel employment,Going out to work,5
2,Personal needs,Eating,Eating,10
3,Personal needs,Personal hygiene,Toileting,15
4,Personal needs,Personal hygiene,Bathing,20
5,Personal needs,Personal hygiene,Grooming,25
6,Personal needs,Personal hygiene,Dressing,30
7,Personal needs,Personal hygiene,Washing hands,35
8,Personal needs,Personal medical,Taking medication,40
9,Personal needs,Sleeping,Sleeping,45


In [4]:
len(dsActivities)

33

At this time, the activities data will not be subject to preprocessing.

## The Sensor Data
### Importing Sensor Data
The file `S1sensors.csv` contains a tabulated values for sensor ID (numeric), room and sensor type.

In [5]:
dsS1Sensors = pd.read_csv('S1sensors.csv',                    # Importing the dataset
                          index_col = None, header = None)    

In [6]:
dsS1Sensors.head(n=5)                                         # Checking the dataset

Unnamed: 0,0,1,2
0,100,Bathroom,Toilet Flush
1,101,Bathroom,Light switch
2,104,Foyer,Light switch
3,105,Kitchen,Light switch
4,106,Kitchen,Burner


In [7]:
len(dsS1Sensors)                                      # Checking the length of dsS1Sensors

76

### Preprocessing the Sensor Data
Column [1] & Column [2] of the sensor data will be concatenated, whitespace will be removed, all text will be cast to lowercase and a final whitespace strip will be performed. 

In [8]:
temp1 = dsS1Sensors[1] + '_' + dsS1Sensors[2]   # Creating a new vector of concatenated column 1 & 2
temp2 = temp1.str.replace(" ", "")              # Removing whitespace between strings
temp3 = temp2.str.lower()                       # Changing all text to lowercase
temp4 = temp3.str.strip()                       # Striping any remaining whitespace
dsS1Sensors[3] = temp4                          # Adding the precessed vector back into the dataset
dsS1Sensors.rename(columns={0:'subActNum', 
                            1:'room',
                            2:'activity',
                            3:'concat',
                            4:'energyReq'}, 
                   inplace=True)

In [9]:
dsS1Sensors.head(n=6)                           # Checking the resultant dataset

Unnamed: 0,subActNum,room,activity,concat
0,100,Bathroom,Toilet Flush,bathroom_toiletflush
1,101,Bathroom,Light switch,bathroom_lightswitch
2,104,Foyer,Light switch,foyer_lightswitch
3,105,Kitchen,Light switch,kitchen_lightswitch
4,106,Kitchen,Burner,kitchen_burner
5,107,Living room,Light switch,livingroom_lightswitch


<p style='text-align: justify;'> 
Each row needs to be inspected manually to determine if the activity requires electricity. This can only be done initially by manual inspection.
</p>

<p style='text-align: justify;'> 
</p>

In [40]:
def showUniqueValues(list1):                 # Function to get unique values 
    unique_list = []                         # Intilize a null list 
    
    for x in list1:                          # Traverse for all elements 
        if x not in unique_list:             # Check if exists in unique_list or not 
            unique_list.append(x)            # Append to unique_list
    
    for x in unique_list:                    # Print list 
        print(x)

In [41]:
showUniqueValues(dsS1Sensors.iloc[:,3])   # Pass the concacenated sensor list to unique

bathroom_toiletflush
bathroom_lightswitch
foyer_lightswitch
kitchen_lightswitch
kitchen_burner
livingroom_lightswitch
bedroom_lightswitch
porch_lightswitch
kitchen_coffeemachine
kitchen_drawer
kitchen_refrigerator
kitchen_oven
bathroom_door
kitchen_toaster
kitchen_cabinet
kitchen_window
kitchen_freezer
bedroom_jewelrybox
foyer_door
kitchen_door
kitchen_washingmachine
kitchen_microwave
kitchen_cereal
bedroom_drawer
livingroom_dvd
bathroom_medicinecabinet
kitchen_containers
bedroom_lamp
bathroom_cabinet
bathroom_sinkfaucet-hot
kitchen_dishwasher
livingroom_lamp
foyer_closet
office/study_drawer
bedroom_window
bathroom_sinkfaucet-cold
kitchen_laundrydryer
office/study_lightswitch
bathroom_showerfaucet
bathroom_exhaustfan
kitchen_garbagedisposal


In [73]:
a = dsS1Sensors.iloc[:,3]
specialChar = '/'
reqElectricity = 'ligh|burn|mach|toas|freez|dvd|lamp|washer|dry|exh|disp|frig|oven|hot|micro'
b = a.str.contains(reqElectricity, regex = True)
c = a.str.contains(specialChar, regex = True)
dsS1Sensors['reqElectricity'] = b
dsS1Sensors['specialChar'] = c

In [88]:
# List all characters
def split(word): 
    return [char for char in word]

split(dsS1Sensors['concat'][0])

['b',
 'a',
 't',
 'h',
 'r',
 'o',
 'o',
 'm',
 '_',
 't',
 'o',
 'i',
 'l',
 'e',
 't',
 'f',
 'l',
 'u',
 's',
 'h']

In [89]:
forwardSlashFilter = dsS1Sensors['concat'].str.contains('/', regex = True)
# Filter for true

In [74]:
dsS1Sensors[0:25]

Unnamed: 0,subActNum,room,activity,concat,reqElectricity,specialChar
0,100,Bathroom,Toilet Flush,bathroom_toiletflush,False,False
1,101,Bathroom,Light switch,bathroom_lightswitch,True,False
2,104,Foyer,Light switch,foyer_lightswitch,True,False
3,105,Kitchen,Light switch,kitchen_lightswitch,True,False
4,106,Kitchen,Burner,kitchen_burner,True,False
5,107,Living room,Light switch,livingroom_lightswitch,True,False
6,108,Bedroom,Light switch,bedroom_lightswitch,True,False
7,109,Porch,Light switch,porch_lightswitch,True,False
8,118,Kitchen,Burner,kitchen_burner,True,False
9,119,Kitchen,Coffee machine,kitchen_coffeemachine,True,False


In [75]:
dsS1Sensors[26:50]

Unnamed: 0,subActNum,room,activity,concat,reqElectricity,specialChar
26,143,Kitchen,Microwave,kitchen_microwave,True,False
27,144,Kitchen,Refrigerator,kitchen_refrigerator,True,False
28,145,Kitchen,Cereal,kitchen_cereal,False,False
29,146,Bedroom,Drawer,bedroom_drawer,False,False
30,51,Kitchen,Cabinet,kitchen_cabinet,False,False
31,52,Kitchen,Drawer,kitchen_drawer,False,False
32,53,Kitchen,Cabinet,kitchen_cabinet,False,False
33,54,Kitchen,Door,kitchen_door,False,False
34,55,Kitchen,Cabinet,kitchen_cabinet,False,False
35,56,Living room,DVD,livingroom_dvd,True,False


In [76]:
dsS1Sensors[50:76]

Unnamed: 0,subActNum,room,activity,concat,reqElectricity,specialChar
50,72,Kitchen,Cabinet,kitchen_cabinet,False,False
51,73,Kitchen,Cabinet,kitchen_cabinet,False,False
52,75,Bedroom,Drawer,bedroom_drawer,False,False
53,76,Living room,Lamp,livingroom_lamp,True,False
54,78,Kitchen,Drawer,kitchen_drawer,False,False
55,79,Bathroom,Cabinet,bathroom_cabinet,False,False
56,80,Kitchen,Cabinet,kitchen_cabinet,False,False
57,81,Foyer,Closet,foyer_closet,False,False
58,82,Office/study,Drawer,office/study_drawer,False,True
59,83,Kitchen,Cabinet,kitchen_cabinet,False,False


Comment: Later the ## values for subActNum will become subActNum_##

In [19]:
dsS1Sensors['subActNumConcat'] = 'subActNum_' + dsS1Sensors['subActNum'].astype(str)

In [20]:
dsS1Sensors[0:10]

Unnamed: 0,subActNum,room,activity,concat,reqElectricity,subActNumConcat
0,100,Bathroom,Toilet Flush,bathroom_toiletflush,False,subActNum_100
1,101,Bathroom,Light switch,bathroom_lightswitch,True,subActNum_101
2,104,Foyer,Light switch,foyer_lightswitch,True,subActNum_104
3,105,Kitchen,Light switch,kitchen_lightswitch,True,subActNum_105
4,106,Kitchen,Burner,kitchen_burner,True,subActNum_106
5,107,Living room,Light switch,livingroom_lightswitch,True,subActNum_107
6,108,Bedroom,Light switch,bedroom_lightswitch,True,subActNum_108
7,109,Porch,Light switch,porch_lightswitch,True,subActNum_109
8,118,Kitchen,Burner,kitchen_burner,True,subActNum_118
9,119,Kitchen,Coffee machine,kitchen_coffeemachine,True,subActNum_119


In [21]:
subActKeyWithStringDict = pd.Series(dsS1Sensors.concat.values, dsS1Sensors.subActNumConcat.values).to_dict()
subActKeyWithStringDict

{'subActNum_100': 'bathroom_toiletflush',
 'subActNum_101': 'bathroom_lightswitch',
 'subActNum_104': 'foyer_lightswitch',
 'subActNum_105': 'kitchen_lightswitch',
 'subActNum_106': 'kitchen_burner',
 'subActNum_107': 'livingroom_lightswitch',
 'subActNum_108': 'bedroom_lightswitch',
 'subActNum_109': 'porch_lightswitch',
 'subActNum_118': 'kitchen_burner',
 'subActNum_119': 'kitchen_coffeemachine',
 'subActNum_120': 'kitchen_lightswitch',
 'subActNum_125': 'kitchen_drawer',
 'subActNum_126': 'kitchen_refrigerator',
 'subActNum_129': 'kitchen_oven',
 'subActNum_130': 'bathroom_door',
 'subActNum_131': 'kitchen_toaster',
 'subActNum_132': 'kitchen_cabinet',
 'subActNum_133': 'kitchen_cabinet',
 'subActNum_135': 'kitchen_drawer',
 'subActNum_136': 'kitchen_window',
 'subActNum_137': 'kitchen_freezer',
 'subActNum_138': 'kitchen_cabinet',
 'subActNum_139': 'bedroom_jewelrybox',
 'subActNum_140': 'foyer_door',
 'subActNum_141': 'kitchen_door',
 'subActNum_142': 'kitchen_washingmachine',
 '

In [26]:
subActKeyWithEnergyDict = pd.Series(dsS1Sensors.reqElectricity.values, 
                                    dsS1Sensors.subActNumConcat.values).to_dict()
print(subActKeyWithEnergyDict['subActNum_100'],    # Checking the dictionary
      subActKeyWithEnergyDict['subActNum_101'])

False True


In [35]:
seen = {}
dupes = []

for x in dsS1Sensors.concat:
    if x not in seen:
        seen[x] = 1
    else:
        if seen[x] == 1:
            dupes.append(x)
        seen[x] += 1

In [50]:
for i in seen:                  # Adding value counts for dupes
    if (seen[i] > 1 and seen[i] <= 9):
        print(seen[i], " ", i)
    if (seen[i] >= 10):
        print(seen[i], "", i)

3   kitchen_lightswitch
4   kitchen_burner
2   livingroom_lightswitch
7   kitchen_drawer
3   kitchen_refrigerator
15  kitchen_cabinet
2   kitchen_door
5   bedroom_drawer
2   bathroom_medicinecabinet
2   bathroom_cabinet


In [23]:
dsS1Sensors.to_csv('S1Sensors_preprocessed.csv',index = False)

In [24]:
# FIX OFFICE/STUDY
