In [22]:
import csv
import datetime
import random
import glob
import os
import tensorflow as tf
import tensorflow_hub as hub
from IPython.display import Audio
import numpy as np
import scipy
from scipy.io import wavfile
import soundfile as sf
import resampy
import librosa


In [None]:
"""
Data is in epoch timestamp; should be in dataframe
"""
def format_timestamp(time):
    return datetime.datetime.fromtimestamp(int(time))

In [None]:
files = glob.glob("../..//../Desktop/archive/KETI/*/pir.csv")
pir_data = [] #Format of this will depend on how much data we are collecting, what we need it for
for file in files:
    day = []
    f = open(file)
    reader = csv.reader(f)
    for line in reader:
        time = format_timestamp(line[0])
        ts = float(line[1])
        day.append((time,ts))
    pir_data.append(day)

In [None]:
"""
Timedelta to minutes (for checking against alerts)
"""
def get_minutes(td):
    return td.seconds/60

In [None]:
"""
If you only want to look at the data between certain times
"""
def get_modified_data(data,start,end=None):
    if end == None:
        return [el for el in data if el[0].time() >= start]
    return [el for el in data if (el[0].time() >= start and el[0].time() <= end)]


In [None]:
NIGHT_HOUR = 21 #should change/be personalised?
MORNING_HOUR = 8
ALERT_TIME = 30 #should be changed
LIM_TIMESTAMP_STREAK = 10  # Number of timestamps for which there's no movement

In [None]:
"""
Check if person is in one room for too long
"Too long" is room-dependent, but this is just assuming a basic rule of 30 minutes (for bathrooms and stuff)

The first timestamp of movement is stored, and the subsequent movement timestamps are compared to this one. If the timedelta is >= the max time (30 minutes), an alert would be raised.
If there has been a period of (10) consecutive timestamps with no motion, there is an assumption FOR NOW that the person has left the room. If this is the case, all stored variables are reset.
The next timestamp of movemente after this would be stored and the process would begin again. 


To check/change:
10 periods of no movement is arbitrary
What happens with the PIR data if someone is asleep/staying still but still in the room? 
30 minutes in the bathroom being too long is arbitrary
If the PIR sensor does give us non-binary data, what can I do with that?
Do I check other rooms as well?
"""

def checkRoomLength(room_data): 
    start_idx = -1
    no_motion_streak = 0
    
    for i in range(len(room_data)):
        ts, val = room_data[i]
        if val == 0 and start_idx == -1: #Room is empty
            continue
        if val == 0 and start_idx != -1:
            no_motion_streak +=1
        if no_motion_streak > LIM_TIMESTAMP_STREAK: #assume at this point that nobody is in the room
            start_idx = -1
            no_motion_streak = 0
            continue
        if start_idx == -1: start_idx = i #initialise beginning of movement period
        movement_delta = room_data[i][0]-room_data[start_idx][0]
        minutes =  get_minutes(movement_delta) # convert to minutes
        if minutes > ALERT_TIME: 
            print(minutes)
            print("Too long!") #raise alert
            return
    return

In [None]:
"""
Will need to check multiple rooms and door audio (door audio I don't have)
This also currently assumes 24 hour time. 
How would this function be called? Upon hearing a door sound or upon there not being activity for a certain amount of time? (or just routinely check) 
The way this function would be written would depend on its purpose.

Currently: this assumes that a door sound has been heard. 
This code checks through all the rooms, and if movement is seen within a certain time buffer, doesn't raise an alert. 
Assumptions/choices to be made:
1) I need to decide at what point no activity within any room is concerning. This can't be too short of a time, but also can't be too long!
2) This code is assuming door sounds can be detected
3) What happens if someone has a cat/dog? 
"""
BUFFER = datetime.timedelta(minutes=30)
def leftAtNight(door_timestamp, room_data):
    if door_timestamp.hour >= MORNING_HOUR and door_timestamp.hour < NIGHT_HOUR: return #f the timestamp isn't in the period we care about, no need to check
    toRaiseAlert = True
    for room in room_data:
        data = get_modified_data(room,door_timestamp,door_timestamp + BUFFER)
        #Need to decide at what point lack of activity would be concerning
        for (ts,val) in data: 
            if val > 0: 
                toRaiseAlert = False #Does action need to be seen for a certain amount of sampling? Or just any action
                break 
        if toRaiseAlert == False: break
    if toRaiseAlert: 
        print("Oh no, Alert")
                
    
   
    return     
        
    
    

In [9]:
  """Returns list of class names corresponding to score vector."""
def class_names_from_csv(class_map_csv_text):
    class_names = []
    with tf.io.gfile.GFile(class_map_csv_text) as csvfile:
        reader = csv.DictReader(csvfile)
    for row in reader:
        class_names.append(row['display_name'])

    return class_names



In [4]:
def ensure_sample_rate(original_sample_rate, waveform, desired_sample_rate=16000):
  """Resample waveform if required."""
  if original_sample_rate != desired_sample_rate:
    desired_length = int(round(float(len(waveform)) / original_sample_rate * desired_sample_rate))
    waveform = scipy.signal.resample(waveform, desired_length)
  return desired_sample_rate, waveform

In [32]:
def getModel():
    os.environ["TFHUB_CACHE_DIR"] = "\\Users\\chloe\\Documents\\tensorflow"
    model = hub.load('https://tfhub.dev/google/yamnet/1')
    class_map_path = model.class_map_path().numpy()
    class_names = class_names_from_csv(class_map_path)
    return model

In [72]:
"""
split on silence doesn't seem to be possible if I combine librosa and soundfile (look further into this!)
Currently, just splitting on sound length
"""

def splitFile(file_name, segment_dur):
    data,sr = sf.read(file_name, dtype=np.int16)
    segment_length = sr * segment_dur
    num_sections = int(np.ceil(len(data) / segment_length))
    sections = []
    for i in range(num_sections):
        t = data[i * segment_length: (i + 1) * segment_length]
        sections.append(t)
    return sections              

In [69]:
SAMPLE_RATE = 16000.0
def getClassifications(model,wav_data,samplerate):
    waveform = wav_data / 32768.0  # Convert to [-1.0, +1.0]
    waveform = waveform.astype('float32')
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)
    if sr != SAMPLE_RATE:
        waveform = resampy.resample(waveform, sr, SAMPLE_RATE)
    
    scores, embeddings, spectrogram = model(waveform)
    prediction = np.mean(scores, axis=0)
    top5_i = np.argsort(prediction)[::-1][:5]
    
    

    toRet = []
    for i in top5_i:
        toRet.append(class_names[i])
        toRet.append(prediction[i])
    return toRet

In [47]:

model = getModel()

In [71]:
for i in range(len(split)):
    section = split[i]
    print("Section: " + str(i))
    print(getClassifications(model,section,sr))

Section: 0
521
['Inside, small room', 0.14063552, 'Door', 0.09401716, 'Water', 0.08534997, 'Inside, large room or hall', 0.061932128, 'Liquid', 0.05978501]
Section: 1
521
['Inside, small room', 0.27281162, 'Door', 0.098123156, 'Sliding door', 0.06624843, 'Animal', 0.064822264, 'Bouncing', 0.047153708]
Section: 2
521
['Keys jangling', 0.15627353, 'Animal', 0.14754096, 'Inside, small room', 0.09506138, 'Domestic animals, pets', 0.090689115, 'Sliding door', 0.08435214]
Section: 3
521
['Keys jangling', 0.22255456, 'Rodents, rats, mice', 0.14939107, 'Patter', 0.12602122, 'Crushing', 0.1076482, 'Animal', 0.09418496]


In [53]:
data.shape

(1739416,)