# dsp4bats - user guide - 02 - get_files

This notebook is a part of the CloudedBats software project (http://cloudedbats.org).

Source code for dsp4bats: https://github.com/cloudedbats/cloudedbats_dsp

### Get a list of sound files and availabe metadata 

Sound files collected by using the CloudedBats WURB contains some useful information in
the filename. By using the SoundFileManager class it is possible to easily extract this
metadata and use it in a Pandas DataFrame.

The example code below shows how to find all wave files in a directory and to use the 
metadata to plot the recording positions on a map.

In [1]:
# Add path from jupyter to the dsp4bats library.
import sys
path = ".."
sys.path.append(path)

In [2]:
# Import python modules.
%matplotlib inline
import pandas as pd
import numpy as np
import dsp4bats

In [3]:
# tkinter is used for open file/dir dialogs.
import tkinter
import tkinter.filedialog
root = tkinter.Tk()
root.withdraw()
#file_path = filedialog.askopenfilename()
#dir_path = tkinter.filedialog.askdirectory()

''

In [4]:
# Create a file manager object.
file_mgr = dsp4bats.SoundFileManager()

In [5]:
# Use Tkinter to select a directory and get all wave files found.
dir_path = tkinter.filedialog.askdirectory()
file_mgr.find_sound_files(dir_path=dir_path, recursive=True)

In [6]:
# Get and check the dataframe content.
files_df = file_mgr.get_dataframe()
files_df

Unnamed: 0,detector_id,datetime,datetime_str,latitude_dd,longitude_dd,latlong_str,rec_type,frame_rate_hz,file_frame_rate_hz,is_te,comments,dir_path,file_name,file_path,file_stem,abs_file_path
0,WurbAA03,2017-07-31 22:05:50+02:00,20170731T220550+0200,43.2908,-2.0130,N43.2908W2.0130,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T220550+0200_N43.2908W2.0130_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T220550+0200_N43.2908W2.0130_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
1,WurbAA03,2017-07-31 22:10:32+02:00,20170731T221032+0200,43.3148,-2.0060,N43.3148W2.0060,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221032+0200_N43.3148W2.0060_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221032+0200_N43.3148W2.0060_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
2,WurbAA03,2017-07-31 22:10:48+02:00,20170731T221048+0200,43.3147,-2.0060,N43.3147W2.0060,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221048+0200_N43.3147W2.0060_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221048+0200_N43.3147W2.0060_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
3,WurbAA03,2017-07-31 22:11:15+02:00,20170731T221115+0200,43.3144,-2.0062,N43.3144W2.0062,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221115+0200_N43.3144W2.0062_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221115+0200_N43.3144W2.0062_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
4,WurbAA03,2017-07-31 22:12:21+02:00,20170731T221221+0200,43.3139,-2.0067,N43.3139W2.0067,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221221+0200_N43.3139W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221221+0200_N43.3139W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
5,WurbAA03,2017-07-31 22:12:35+02:00,20170731T221235+0200,43.3138,-2.0069,N43.3138W2.0069,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221235+0200_N43.3138W2.0069_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221235+0200_N43.3138W2.0069_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
6,WurbAA03,2017-07-31 22:13:22+02:00,20170731T221322+0200,43.3135,-2.0064,N43.3135W2.0064,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221322+0200_N43.3135W2.0064_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221322+0200_N43.3135W2.0064_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
7,WurbAA03,2017-07-31 22:14:25+02:00,20170731T221425+0200,43.3137,-2.0067,N43.3137W2.0067,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221425+0200_N43.3137W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221425+0200_N43.3137W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
8,WurbAA03,2017-07-31 22:14:59+02:00,20170731T221459+0200,43.3137,-2.0067,N43.3137W2.0067,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221459+0200_N43.3137W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221459+0200_N43.3137W2.0067_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...
9,WurbAA03,2017-07-31 22:15:35+02:00,20170731T221535+0200,43.3137,-2.0068,N43.3137W2.0068,TE384,384000,38400,True,,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec,WurbAA03_20170731T221535+0200_N43.3137W2.0068_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...,WurbAA03_20170731T221535+0200_N43.3137W2.0068_...,/home/arnold/Desktop/Bidarray_2017 _wurb3_rec/...


In [9]:
# Group by positions and count files at each position.
distinct_df = pd.DataFrame({'file_count' : files_df.groupby( [ 'latlong_str', 'latitude_dd', 'longitude_dd'] ).size()}).reset_index()

In [10]:
# Add a column for description to be shown when hovering over point in map.
distinct_df['description'] = 'Pos: ' + distinct_df['latlong_str'] + ' Count: ' + distinct_df['file_count'].astype(str)
distinct_df

Unnamed: 0,latlong_str,latitude_dd,longitude_dd,file_count,description
0,N43.0705W1.4167,43.0705,-1.4167,2,Pos: N43.0705W1.4167 Count: 2
1,N43.1288W1.3738,43.1288,-1.3738,1,Pos: N43.1288W1.3738 Count: 1
2,N43.1289W1.3739,43.1289,-1.3739,1,Pos: N43.1289W1.3739 Count: 1
3,N43.1292W1.3739,43.1292,-1.3739,1,Pos: N43.1292W1.3739 Count: 1
4,N43.2618W1.3505,43.2618,-1.3505,1,Pos: N43.2618W1.3505 Count: 1
5,N43.2618W1.3506,43.2618,-1.3506,1,Pos: N43.2618W1.3506 Count: 1
6,N43.2619W1.3505,43.2619,-1.3505,7,Pos: N43.2619W1.3505 Count: 7
7,N43.2619W1.3506,43.2619,-1.3506,125,Pos: N43.2619W1.3506 Count: 125
8,N43.2620W1.3507,43.2620,-1.3507,1,Pos: N43.2620W1.3507 Count: 1
9,N43.2620W1.3508,43.2620,-1.3508,1,Pos: N43.2620W1.3508 Count: 1


In [11]:
# Plot on a map. There are many alternatives to do this
# but folium was easy to use for this example.
# Folium install: sudo pip3 install folium
import folium

# Use the mean value as center for the map.
center_lat = distinct_df.latitude_dd.mean()
center_long = distinct_df.longitude_dd.mean()
# Create map object.
map_osm = folium.Map(location=[center_lat, center_long], zoom_start=10)
# Loop over positions an create markers.
for long, lat, desc in zip(distinct_df.longitude_dd.values,
                         distinct_df.latitude_dd.values,
                         distinct_df.description.values):
    # The description column is used for popup messages.
    marker = folium.Marker([lat, long], popup=desc).add_to(map_osm)

# Display in notebook.
#map_osm

# Write to html file.
map_osm.save('folium.html')