In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import scipy.signal

## Data import
We import the csv file.
We are interested in the  Ping_index , Ping_date , Ping_time , Latitude,  Longitude , and the sv* columns.
Each sv* column corresponds to a depth.
The value for each cell is the logarithm of the intensity of the echo.(ratio of intensity)

In [2]:
data_path = '/home/benjamin/Bureau/data jam days/Hackathlon data/'

In [3]:
def load_data(filename):
    df = pd.read_csv(filename)
    del df['Distance_gps']
    del df['Distance_vl']
    del df['Ping_milliseconds']
    del df['Depth_start']
    del df['Depth_stop']
    del df['Range_start']
    del df['Range_stop']
    del df['Sample_count']
    data= np.array(df.iloc[:,5:]).transpose()
    return data,df

### Filtering

In [4]:
# Code from Roland to filter some Sonar artefacts
def binary_impulse(Sv, threshold=10):
    '''
    :param Sv: gridded Sv values (dB re 1m^-1)
    :type  Sv: numpy.array
    
    :param threshold: threshold-value (dB re 1m^-1)
    :type  threshold: float
    
    return:
    :param mask: binary mask (0 - noise; 1 - signal)
    :type  mask: 2D numpy.array

    desc: generate threshold mask
    
    defined by RB
    
    status: test
    
    '''
    
    mask = np.ones(Sv.shape).astype(int)

    samples,pings = Sv.shape

    for sample in range(1, samples-1):
        for ping in range(0, pings):
            
            a = Sv[sample-1, ping]
            b = Sv[sample, ping]
            c = Sv[sample+1, ping]

            if (b - a > threshold) & (b - c > threshold):
                mask[sample, ping] = 0

    return mask

In [5]:
def filter_data(data_matrix):
    # The relevant data values for the krill are between -70 and -65
    data2 =data_matrix.copy()
    data2[data_matrix<-70] = -70
    data2[data_matrix>-65] = -65
    data2 = data2 + 70
    # We apply a median filtering to get rid of the isolated peaks or lines (which are noise)
    # Two steps
    # A variant of the median filter implemented by Roland for lines
    datafilt = binary_impulse(data2.transpose(), threshold=2)
    datafilt = datafilt.transpose()*data2
    # A standard median filter used in image processing
    datafilt2 = sp.signal.medfilt(datafilt,kernel_size=3)
    # try to get rid of the mean by line
    data3 =datafilt2.copy()
    data3 = data3 - np.mean(data3,1,keepdims=True)
    # Gaussian filtering
    from skimage.filters import gaussian
    gauss_denoised = gaussian(data3,10)
    # Compute a function to find the krill
    signaldata = gauss_denoised[0:150,:]
    sumsignal = np.sum(signaldata,0)-np.mean(np.sum(signaldata,0))
    binary_signal = sumsignal.copy()
    threshold = 11
    binary_signal[sumsignal<threshold] = 0
    binary_signal[sumsignal>threshold] = 100
    return binary_signal

In [6]:
def extract_info(binary_signal,df):
    krill_list = []
    krill_dic = {}
    data_len = len(binary_signal)
    for idx in range(data_len):
        if binary_signal[idx] >0:
            if idx==0 or binary_signal[idx-1] == 0:
                # beginning of a krill detection
                krill_layer_start = idx
                # record latitude and longitude
                krill_dic['latitude_start'] = df.iloc[idx,3]
                krill_dic['longitude_start'] = df.iloc[idx,4]
                krill_dic['date_start'] = df.iloc[idx,1]
                krill_dic['time_start'] = df.iloc[idx,2]
            if idx == data_len-1 or binary_signal[idx+1] == 0:
                # end of krill detection
                krill_layer_stop = idx
                # record latitude and longitude
                krill_dic['latitude_stop'] = df.iloc[idx,3]
                krill_dic['longitude_stop'] = df.iloc[idx,4]
                krill_dic['date_stop'] = df.iloc[idx,1]
                krill_dic['time_stop'] = df.iloc[idx,2]
                # store krill layer in list
                krill_list.append(krill_dic)
                krill_dic = {}
                # Compute Krill depth
                #if krill_layer_stop<data_len-1:
                #    krill_layer = datafilt2[krill_layer_start:krill_layer_stop+1]
                #else:
                #    krill_layer = datafilt2[krill_layer_start:]
                #min_depth,max_depth,mean_depth = krill_depth(krill_layer)
    return krill_list

In [7]:
import glob
global_krill_list = []
for filename in glob.iglob(data_path+'*.csv'):
    print('Loading data ...')
    print('%s' % filename)    
    data,df = load_data(filename)
    print('Filtering data...')
    binary_signal = filter_data(data)
    print('Extraction information...')
    krill_list = extract_info(binary_signal,df)
    print('Number of Krill events:',len(krill_list))
    global_krill_list += krill_list

Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170215-T104632.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 23
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T095411.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 19
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T202310.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 12
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170215-T174955.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 31
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170215-T053628.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 5
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T

  after removing the cwd from sys.path.
  """


Extraction information...
Number of Krill events: 1
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170215-T074727.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 4
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170215-T163906.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 13
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T132329.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 40
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T062448.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 30
Loading data ...
/home/benjamin/Bureau/data jam days/Hackathlon data/ACE_leg2_200khz-D20170214-T165300.sv.csv
Filtering data...
Extraction information...
Number of Krill events: 31
Loading data ...
/home/benjamin/Bureau/data 

In [8]:
len(global_krill_list)

278

In [9]:
import json
with open('krill_data.json', 'w', encoding='utf-8') as f:
    f.write(json.dumps(global_krill_list, ensure_ascii=False))

In [None]:
def krill_depth(array):
    # Compute the depth of the krill swarm
    depth_function = np.sum(array,1)