In [1]:
import sys
import datetime as dt
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import h5py
import dask.dataframe as dd
import dask.array as da

import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append('../')
from envir import config

#### SPL Data 2017 - 2018

In [2]:
sensors = pd.read_json(config.dataFol+"sonyc_test.collection.json")
bird_sensors = ['sonycnode-b827eb815321.sonyc', # 19 Washington Square North - 15-61 Washington Square N, New York, NY 10011
                'sonycnode-b827eb8e2420.sonyc', # 4 Washington Square North 1-6 Washington Square North, New York, NY 10003
                'sonycnode-b827eb86d458.sonyc', # Silver Lab - 60 5th Ave
                'sonycnode-b827eb905497.sonyc', # Kimmel Center - 60 Washington Square S, New York, NY 10012
                'sonycnode-b827eb0fedda.sonyc', # Juan Carlos - King Juan Carlos I of Spain Center, 53 Washington Square S, New York, NY 10012
                'sonycnode-b827eb1685c7.sonyc'] # Shimkin Reading Room - 35-51 West 4th Street, New York, NY 10012

sensors['bird_sensor'] = np.where(sensors.fqdn.isin(bird_sensors), 1,0)
bird_sensors = [(lambda x: x+'.h5')(x) for x in bird_sensors]

In [3]:
sensors[sensors.bird_sensor==1][['fqdn', 'title']]

Unnamed: 0,fqdn,title
74,sonycnode-b827eb905497.sonyc,Kimmel Center
76,sonycnode-b827eb8e2420.sonyc,4 Washington Square North
87,sonycnode-b827eb1685c7.sonyc,Shimkin Reading Room
88,sonycnode-b827eb815321.sonyc,19 Washington Square North
93,sonycnode-b827eb86d458.sonyc,Silver lab
97,sonycnode-b827eb0fedda.sonyc,Juan Carlos


In [4]:
def clean_spl_min(file = 'sensor_file_location', sensor = 'name'):
    sensordf = pd.read_hdf(file, key = '/minute_intervals')
    sensordf['sonyc_sensor_id'] = file[-31:-3]
    sensordf['sonyc_sensor_name'] = sensor
    sensordf['timestamp'] = pd.to_datetime(sensordf['timestamp'], unit='s')
    sensordf['year'] = pd.DatetimeIndex(sensordf['timestamp']).year
    sensordf['month'] = pd.DatetimeIndex(sensordf['timestamp']).month
    sensordf['day'] = pd.DatetimeIndex(sensordf['timestamp']).day
    sensordf['hour'] = pd.DatetimeIndex(sensordf['timestamp']).hour
    sensordf['minute'] = pd.DatetimeIndex(sensordf['timestamp']).minute
    sensordf['timestamp'] = sensordf['timestamp'].astype('str').str[0:16]
    sensorfin = sensordf[['sonyc_sensor_id','sonyc_sensor_name','timestamp', 'year', 'month', 'day', 'hour', 'minute',
                          'dBAS_lin_mean', 'dBAS_mean', 'dBAS_max']]
    return(sensorfin)

In [5]:
# sample to see what dataframe the function returns
clean_spl_min(config.dataFol+'spl/2017/'+'sonycnode-b827eb1685c7.sonyc.h5', sensor = 'Kimmel Center').head()

Unnamed: 0,sonyc_sensor_id,sonyc_sensor_name,timestamp,year,month,day,hour,minute,dBAS_lin_mean,dBAS_mean,dBAS_max
0,sonycnode-b827eb1685c7.sonyc,Kimmel Center,2017-01-20 18:21,2017,1,20,18,21,65.790405,64.380714,73.779999
1,sonycnode-b827eb1685c7.sonyc,Kimmel Center,2017-01-20 18:22,2017,1,20,18,22,64.585182,64.105667,70.980003
2,sonycnode-b827eb1685c7.sonyc,Kimmel Center,2017-01-20 18:23,2017,1,20,18,23,62.332256,62.321999,62.959999
3,sonycnode-b827eb1685c7.sonyc,Kimmel Center,2017-01-20 19:21,2017,1,20,19,21,62.327068,62.310001,62.720001
4,sonycnode-b827eb1685c7.sonyc,Kimmel Center,2017-01-20 19:22,2017,1,20,19,22,64.721664,63.962051,71.32


In [6]:
dfs = []
for index, row in sensors[sensors.bird_sensor ==1][['fqdn', 'title']].iterrows():
    print(row['fqdn'])
    yeardfs = []
    years = ['2017', '2018']
    for yr in years:
        sensor = clean_spl_min(file = config.dataFol+'spl/'+yr+'/'+row['fqdn']+'.h5', sensor = row['title'])
        yeardfs.append(sensor)
    dfs.append(pd.concat(yeardfs))

sonycnode-b827eb905497.sonyc
sonycnode-b827eb8e2420.sonyc
sonycnode-b827eb1685c7.sonyc
sonycnode-b827eb815321.sonyc
sonycnode-b827eb86d458.sonyc
sonycnode-b827eb0fedda.sonyc


In [7]:
df = pd.concat(dfs)

#### Updated SPL File By Minute 2019 - 2020

In [8]:
spl_new = pd.read_hdf(config.dataFol+'spl/master_df.h5')
spl_new.reset_index(inplace=True)
spl_new['timestamp']=spl_new['time'].astype('str').str[0:16]

spl_new = spl_new[['timestamp','sensor_id', 'laeq']]
spl_new.rename(columns=({'sensor_id': 'sonyc_sensor_id', 'laeq':'dBAS_lin_mean'}), inplace=True)

spl_new = spl_new.merge(sensors[sensors.bird_sensor==1][['fqdn', 'title']], how='left', 
         left_on='sonyc_sensor_id', right_on='fqdn')

spl_new = spl_new[['timestamp', 'sonyc_sensor_id', 'title', 'dBAS_lin_mean']]

spl_new.rename(columns=({'title': 'sonyc_sensor_name'}), inplace=True)

Unnamed: 0,timestamp,sonyc_sensor_id,sonyc_sensor_name,dBAS_lin_mean
0,2019-01-01 02:58,sonycnode-b827eb815321.sonyc,19 Washington Square North,70.687073
1,2019-01-01 02:59,sonycnode-b827eb815321.sonyc,19 Washington Square North,67.926041
2,2019-01-01 03:00,sonycnode-b827eb815321.sonyc,19 Washington Square North,62.928024
3,2019-01-01 03:01,sonycnode-b827eb815321.sonyc,19 Washington Square North,69.372772
4,2019-01-01 03:02,sonycnode-b827eb815321.sonyc,19 Washington Square North,65.137650
...,...,...,...,...
9253456,2020-05-30 22:19,sonycnode-b827eb1685c7.sonyc,Shimkin Reading Room,66.316750
9253457,2020-05-30 22:20,sonycnode-b827eb1685c7.sonyc,Shimkin Reading Room,67.029953
9253458,2020-05-30 22:21,sonycnode-b827eb1685c7.sonyc,Shimkin Reading Room,65.996613
9253459,2020-05-30 22:22,sonycnode-b827eb1685c7.sonyc,Shimkin Reading Room,63.701878


In [13]:
df_new = pd.concat([df, spl_new])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [15]:
df_new = df_new[['timestamp', 'sonyc_sensor_id', 'sonyc_sensor_name', 'dBAS_lin_mean']]

In [16]:
df_new.to_csv(config.dataFol+'clean_spl/clean_spl.csv', index=False)