In [None]:
import numpy as np
from infromation_theory_utils import JSD, Entropy
import pandas as pd
import soundfile as sf
import os
from scipy.linalg import svd, toeplitz
from scipy.stats import zscore
from statsmodels.tsa.stattools import acf
from scipy.signal import spectrogram
import time
    
import matplotlib.pyplot as plt
%matplotlib inline

# Import packages related to downsampling 
import samplerate 
# import resampy
import scipy.signal as scipy_signal
import math

# Import packages related to filtering
from scipy.io import wavfile
import os
import matplotlib.pyplot as plt
import aasi_filters as filters
from scipy.signal import butter, lfilter


## Introduction

This notebook shows the differences between day and night in the ECI curves. The data that was used is from Mixed_AM_Dataset2 from the Google Drive. I separately downloaded the data onto an external hard drive.

Note: I split the audio files into random five-second clips. 

In [None]:
lag = 512
cotas = pd.read_csv('./boundary_files/Cotas_HxC_bins_' + str(int(lag)) + '.csv')
noise = pd.read_csv('./coloredNoises/coloredNoises_' + str(int(lag)) + '.csv')

# Known files with rain in them
# datapath = "./Rain/" 
# datapath = "./Sample_Data/"

# From external hard drive: 
datapath = "/Volumes/Elements/Mixed_AM_Dataset2/"

### Import relevant functions

In [None]:
""" 
Some functions taken from section called Split 
Audio Data from File: Audio Data Visualization
by Erika Joun

Added samplerate input parameter to functions
"""
from scipy.io.wavfile import write
original_rate = 384000 # Also, defined in above code

# Get n-second clips
def split_into_n_seconds(wav_data, samplerate, n=5):
    length_in_seconds = len(wav_data) / samplerate
#     print(length_in_seconds)
    length_in_minutes = length_in_seconds / 60
#     print(length_in_minutes)
#     length_in_minutes = int(length_in_minutes)
#     print(length_in_minutes)
    shorter_len = int(round(length_in_minutes / (1/(60/n))))
    print(shorter_len)
    second_clips = None
    
    try:  
#         print(type(wav_data))
#         print(type(shorter_len))
        second_clips = np.split(wav_data, shorter_len)
        
    except: # Figure out what this exception error is for
        cut_wav_data = wav_data[:-((len(wav_data)) % shorter_len)]
        second_clips = np.split(cut_wav_data, shorter_len)

    print('%d %d-second clips' % (len(second_clips), n))
    return second_clips

# Saves clip as a wav file
def save_clip(filename, wav_array, samplerate):
    #assert('.' not in filename)
    write(filename + '.wav', samplerate, wav_array)
    
# Saves all clips to a directory
def save_clips_to_dir(shorter_clips, dirname, samplerate):
    for i, clip in enumerate(shorter_clips):
        save_clip(dirname + '/%d' % i, clip, samplerate)
    # Save clip as the file name
    
## FILTERING FUNCTIONS ###
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def make_txt_file(lists, path): 
    """ 
    Makes .txt file given list of filenames (labels)
    into the given directory name.
    """
    completeName = os.path.join(path, 'List_of_Rain.txt')
    file1 = open(completeName, 'w')
    file1.writelines(["%s\n" % item  for item in lists])

## Interactive Plotting with Plot.ly ##

In [None]:
"""
For reference: https://plotly.com/python/line-and-scatter/
Plot an interacitve line graph
:param clip_names: 
:param x_data: 
:param y_data: 
:param xlabel: Label of x axis
:param ylabel: Label of y axis
:param plot_title: Title of the graph

:return: Will display an interactive graph
"""
import plotly.graph_objects as go
import numpy as np

def interactive_plot(clip_names, x_data, y_data, xlabel, ylabel, plot_title):
    fig = go.Figure(data=go.Scatter(x=cotas['Entropy'],
                                    y=cotas['Complexity'],
                                    mode='lines+markers',
                                    # marker_color=red for < 0.5, green otherwise
                                    text=np.array(clip_names))    
    
    fig.update_layout(title=plot_title,
                      xaxis=dict(title=xlabel),
                      yaxis=dict(title=ylabel)) 
    fig.show()

In [None]:
import os
import datetime

FILE_PATH = ""
AM_NAME = "AM16"
FIRST_STAMP = "00:00:00 06/13/2019"
TITLE = "Bird Vocalizations from 00:00 6/13/19 - 23:50 6/17/19 - "+AM_NAME

# get all file names in FILE_PATH
(_, _, clip_names) = next(os.walk(FILE_PATH))

# collect data
global_scores = []
with open("global_scores_"+AM_NAME+".txt", "r") as f:
    for line in f:
        global_scores.append(float(line.strip()))

# create range of datetime objects
base = datetime.datetime.strptime(FIRST_STAMP, "%H:%M:%S %m/%d/%Y")
time_stamps = [base + datetime.timedelta(minutes=x*10) for x in range(len(global_scores))]

# run interactive plot
interactive_plot(clip_names, time_stamps, global_scores, 
                 "Time", "Prediction scores", TITLE)

### Plotting Day vs Night ###
Without any downsampling or filtering 

In [None]:
# Day: 10am-9:59pm PT (or _170000 to _045900 in UTC) 
# Night: 10pm-9:59am (or _050000 to _165900 in UTC)

# Comment out later
np.random.seed(0) 

datapath = "/Volumes/Elements/Mixed_AM_Dataset2/"

# def day_or_night(datapath):
listing = os.listdir(datapath)
    # print(listing)

list_day = [] 
list_night = []
for file in listing: 
#     print(file)
    n = None if '_' not in file else int(os.path.splitext(file)[0].split('_')[-1])
    if n is None: 
        continue 
    elif 50000<n & n<165900: 
        list_night.append(file)
    else: # _500000 to _165900 in UTC
        list_day.append(file)
    
#     return list_day, list_night

# day_or_night(datapath)[0] #day
# day_or_night(datapath)[1] #night

print(list_day)
print(list_night)

In [None]:
H_day = []
C_day = []
labels = []

np.random.seed(0) 

for f in list_day: 

    print(f)
    x, fs = sf.read(os.path.join(datapath, f))
    print(x.shape)
#     before = time.time()

    ### SPLIT ###
    # Split files into 5 second audio clips 
    split_data = np.array(split_into_n_seconds(x, original_rate, 5))

    number_of_rows = split_data.shape[0]
    random_indices = np.random.choice(number_of_rows, size=1, replace=False)
    x = split_data[random_indices, :]

    # Algorithm steps 
    #Auto-Correlation Matrix
    rxx = acf(x.flatten(), nlags=lag, unbiased=True, fft=True)
    Sxx = toeplitz(rxx)

    #Singular Value Decomposition
    U, s, Vt = svd(Sxx)

    H_day.append(Entropy(s))              # Entropy
    C_day.append(Entropy(s)*JSD(s))       # Complexity, also called EGCI index
    labels.append(file.split('.')[0])  # Species name originally but files name now
        
# time_in_seconds = time.time() - before
# print(f'{time_in_seconds:.3f}')

In [None]:
interactive_plot(list_day, "HxC Day Curve", H_day, C_day)

In [None]:
H_night = []
C_night = []
labels = []

np.random.seed(0) 
# print(list_night)
for f in list_night: 

    print(f)
    x, fs = sf.read(os.path.join(datapath, f))
#     before = time.time()

    ### SPLIT ###
    # Split files into 5 second audio clips 
    split_data = np.array(split_into_n_seconds(x, original_rate, 5))

    number_of_rows = split_data.shape[0]
    random_indices = np.random.choice(number_of_rows, size=1, replace=False)
    x = split_data[random_indices, :]

    # Algorithm steps 
    #Auto-Correlation Matrix
    rxx = acf(x.flatten(), nlags=lag, unbiased=True, fft=True)
    Sxx = toeplitz(rxx)

    #Singular Value Decomposition
    U, s, Vt = svd(Sxx)

    H_night.append(Entropy(s))              # Entropy
    C_night.append(Entropy(s)*JSD(s))       # Complexity, also called EGCI index
    labels.append(file.split('.')[0])  # Species name originally but files name now
        
# time_in_seconds = time.time() - before
# print(f'{time_in_seconds:.3f}')

In [None]:
interactive_plot(list_night, "HxC Night Curve", H_night, C_night)

In [None]:
# K-S Test 
from scipy.stats import ks_2samp 

d1_statistic, p1_val = ks_2samp(H_day, H_night)
d2_statistic, p2_val = ks_2samp(C_day, C_night)

print(d1_statistic, p1_val)
print(d2_statistic, p2_val)