In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Dependencies, functions, globals

In [1]:
 # dependencies

import os
import math
import scipy
import random
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import itertools
import random
import json
import seaborn as sns
import pickle

from numpy import isnan
from random import shuffle

from scipy.signal import find_peaks
from scipy import interpolate
from scipy.interpolate import interp1d
from scipy.signal import butter, filtfilt

from bokeh.plotting import figure, output_file, save
from bokeh.io import push_notebook, show, output_notebook, export_png
from bokeh.layouts import row, column
from bokeh.plotting import figure, output_file
from bokeh.transform import linear_cmap
from bokeh.models import ColorBar, ColumnDataSource, DatetimeTickFormatter, CheckboxGroup, CustomJS, Title, Span, LabelSet, TapTool, BoxAnnotation
from bokeh.palettes import Category20 as palette, Category10
from bokeh.colors import RGB
from bokeh.embed import json_item, file_html
from bokeh.resources import CDN


In [2]:
output_notebook()

In [None]:
!pip install selenium

In [None]:
!pip install anaconda

In [None]:
!conda install -c conda-forge firefox geckodriver

In [5]:
EXTEND_WINDOW = {1:1, 2:2, 3:2, 4:7}
SAMPLE_RATE = 50 #Hz

#DIR = 'C:/ritmo/CopenhagenMusicLab/'
DIR = '/content/drive/MyDrive/ritmo/CopenhagenMusicLab/'

In [6]:
# functions

def rms_signal(data, window_size, scroll):
  i = int(window_size/2)
  l = data.shape[0]
  rms_y = [np.nan]*l
  while True:
    if i >= l:
      break

    if (i >= window_size/2) and (l-i >= window_size/2):
      in_window  = int(i-window_size/2)
      out_window = int(i+window_size/2)
      data_rms = data.iloc[in_window:out_window]
      rms_value = math.sqrt(sum([i*i for i in data_rms])/len(data_rms)) if data_rms.isna().sum() == 0 else np.nan
    else:
      rms_value = np.nan
    
    rms_y[i] = rms_value
    #[rms_y.append(np.nan) for i in range(int(scroll-1))]
    i = i + scroll

  df = pd.DataFrame(index = data.index, columns = ['rms'], data={'rms': rms_y})
  return df

def butter_lowpass_filter(data, cutoff, fs, order):
  normal_cutoff = cutoff / nyq
  # get coefficients
  b, a = butter(order, normal_cutoff, btype='low', analog=False)
  y = filtfilt(b, a, data)
  return y

"""
# rms
def rms_(data):
  return math.sqrt(sum([i*i for i in data])/len(data))
# hard limiter
def hard_limiter(data, column_name, limit):
  data.loc[data[column_name] > limit, column_name] = limit
  return data
"""

# FinnPy

def nannotime(row,shift,dshift):
    r = row.copy()
    cols = r.index
    for c in cols:
        if not c.startswith('time'):
            if not c.startswith('datetime'):
                r[c] = np.nan
            if c.startswith('datetime'):
                r[c] = r[c] + dshift  
        if c.startswith('time'):
            r[c] = r[c] + shift     
    return r

def gap_nans(data,gap_t):
    # data is a pandas dataframe with columns called time and timestamps which is used to ID gaps greater than gap_T
    # rows of NaN data is added to non-time columns before the first sample, in each gap, and after the last sample of data
    cols = data.columns
    if 'time' in cols:
        time_col = 'time'
    if 'time_concert' in cols:
        time_col = 'time_concert'
    deltat = round(0.35*data[time_col].diff().median())
    dtdeltat =  pd.to_timedelta(deltat,unit = 'ms')
    
    data = data.append(nannotime(data.iloc[-1,:],deltat,dtdeltat),ignore_index=True)
    
    dt = data[time_col].diff()
    a = list(dt[dt>gap_t].index)
    a.sort(reverse=True)
    for gapi in a:
        data = data.append(nannotime(data.iloc[gapi-1,:],deltat,dtdeltat))
        data = data.append(nannotime(data.iloc[gapi,:],-deltat,dtdeltat))

    data = data.append(nannotime(data.iloc[0,:],-1,dtdeltat)).sort_values(time_col,ignore_index=True)
    
    return data

# Create csv files for each clapping section

In [None]:
# create csv files for each clapping sections

Start_time = '2021-10-26 17:30:00+0000'
End_time   = '2021-10-26 20:30:00+0000'

sample_rate= 50 # Hz

# set standard timestamps in date time and millisecond floats. Datetime for ploting, ms for interp
# 50 Hz
ts_dt = pd.date_range(pd.to_datetime(Start_time), pd.to_datetime(End_time), freq='20ms')
ts_ts = np.arange(pd.to_datetime(Start_time).timestamp(), pd.to_datetime(End_time).timestamp(), 0.02)*1000

dir_devices = DIR + 'Aligned/Hall/'
f_events = DIR + 'Concert_content/Concert_event_times_clean.csv'
f_clappings = DIR + 'Concert_content/Concert_event_times_clean_tags.csv'

# read events data
df_events = pd.read_csv(f_events)
df_clappings = pd.read_csv(f_clappings)
shift_concert_time = df_events[df_events.time_concert == 0].time.values[0]

# devices
n_devices = len(os.listdir(dir_devices))

index_devices = slice(0, n_devices) # which devices
exclude_devices = [] # device indexes to exclude

mask = np.zeros((n_devices), dtype='bool')
mask[index_devices] = True
mask[exclude_devices] = False

list_devices = np.array(os.listdir(dir_devices))[mask]

print(len(list_devices), ' devices.')

# events
index_events = slice(3, df_clappings.shape[0]) # which events

clapping_events_in  = df_clappings.start_time_concert.to_list()[index_events]
clapping_events_out = df_clappings.end_time_concert.to_list()[index_events]
clapping_events_types = df_clappings.clapping_type.to_list()[index_events]
clapping_events_tags  = df_clappings.tag.to_list()[index_events]

plots = []
section_n = 0

# for every event:
# go over all devices
for ev_in, ev_out, ev_type, ev_tag in zip(clapping_events_in, clapping_events_out, clapping_events_types, clapping_events_tags):
  extend = EXTEND_WINDOW[ev_type]
  in_event_time = (ev_in - extend) * 10**3
  out_event_time = (ev_out + extend) * 10**3

  list_of_lists_x = []
  list_of_lists_y = []
  list_of_device_names = []

  print('time interval: ', in_event_time, out_event_time)

  dev_n = 0
  ts_ev = pd.date_range(start=pd.to_datetime(0), end=pd.to_datetime(abs(out_event_time - in_event_time), unit='ms'), freq='20ms')
  df_section = pd.DataFrame(index = ts_ev)
  df_rms_plot = pd.DataFrame(index = ts_ev)
  # for every device:
  # gapnans, resample, select event, !find peaks, norm, caluclate rms, plot
  for f in list_devices:
    print('   device: ', dev_n)

    # read data
    df = pd.read_csv(dir_devices + f)

    # gap_nans
    df['datetime_concert'] = (pd.to_datetime(df['timestamp'], unit='ms'))
    df_gapsafe = gap_nans(df, 100)
    df_gapsafe['datetime'] = (pd.to_datetime(df_gapsafe['timestamp'],unit='ms'))

    # resample
    df_resampled = pd.DataFrame(index = ts_dt[:-1])
    cols = df_gapsafe.columns
    for col in cols:
        func = interpolate.interp1d(df_gapsafe['timestamp'], df_gapsafe[col], fill_value='extrapolate')
        df_resampled[col] = func(ts_ts)

    # norm
    aud_df = pd.DataFrame(index = ts_dt[:-1], columns=['time_concert', str(dev_n)])
    aud_df.time_concert = df_resampled.time_concert

    norm = np.linalg.norm(df_resampled[['x','y','z']].diff().values,axis=1)
    
    if np.nanmedian(norm)>0:
        aud_df[str(dev_n)] = norm/np.nanmedian(norm)
    else:
        aud_df[str(dev_n)] = norm/np.nanmean(norm)
    
    # select event
    ## find the indexes with the closest time_concert values
    in_event_index  = aud_df.time_concert.sub(in_event_time).abs().idxmin()
    out_event_index = in_event_index + pd.to_timedelta((ts_ev.shape[0]-1)*20, unit='ms') #aud_df.time_concert.sub(out_event_time).abs().idxmin()
    ## select the indexes
    df_clap = aud_df.loc[in_event_index:out_event_index]

    # low pass filter
    ## parameters
    T = 10.0        # Sample Period
    fs = 50.0       # Hz
    cutoff = 12.5   # cutoff frequency of the filter # Hz
    nyq = 0.5 * fs  # Nyquist Frequency ???
    order = 2 
    n = int(T * fs)
    ## action
    data_raw = df_clap[~isnan(df_clap[str(dev_n)])]
    if data_raw[str(dev_n)].shape[0] > 9:
      data_filtered = butter_lowpass_filter(data_raw[str(dev_n)], cutoff, fs, order)
    else:
      #data_raw[str(dev_n)] = np.nan
      data_filtered = data_raw[str(dev_n)]
    
    df_clap.loc[data_raw.index, 'filtered'] = data_filtered
    
    # rms
    df_rms_signal = rms_signal(df_clap['filtered'], sample_rate, sample_rate)
    df_clap[str(dev_n) + '_rms'] = df_rms_signal

    # load to the df for clapping section
    df_section[str(dev_n)] = df_clap['filtered'].values

    # load data for plot
    df_section_plot = pd.DataFrame(index = df_section.index)
    df_section_plot['rms'] =  df_clap[str(dev_n) + '_rms'].values
    df_section_plot.dropna(inplace=True)
    list_of_lists_x.append(df_section_plot.index)
    list_of_lists_y.append(df_section_plot.rms)
    list_of_device_names.append(f[:3])

    df_rms_plot[str(dev_n) + '_rms'] = df_clap[str(dev_n) + '_rms'].values

    # peaks
    #peak_index_list = scipy.signal.find_peaks(aud_df[str(dev_n)], threshold=1, distance=10)[0] # note: change to filtered
    #df_peaks = aud_df.iloc[peak_index_list]
    #print('peaks: ', df_peaks.shape[0])

    # increment counter
    dev_n += 1

    # end devices loop

  # plot section summary - rms
  data_bokeh_rms = pd.DataFrame()
  data_bokeh_rms['xs'] = list_of_lists_x
  data_bokeh_rms['ys'] = list_of_lists_y
  data_bokeh_rms['color'] = (palette[20]*4)[:data_bokeh_rms.shape[0]]

  data_bokeh_signal = pd.DataFrame()
  data_bokeh_signal['xs'] = [df_section.index for i in df_section.columns]
  data_bokeh_signal['ys'] = [df_section[i] for i in df_section.columns]
  data_bokeh_signal['color'] = (palette[20]*4)[:data_bokeh_signal.shape[0]]

  df_rms_plot['avr'] = df_rms_plot.mean(axis=1)

  colors = palette[20]*6
  
  title_figure = ev_tag + '   ' + str(ev_in + extend + shift_concert_time) + ' sec'
  p = figure(title = title_figure, plot_width=2200, plot_height=600, x_axis_type='datetime', sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')
  p.multi_line(xs='xs', ys='ys', source=data_bokeh_signal, color='color', legend_label='Signal', line_alpha=0.4)
  p.line(x=df_section.index, y=df_clap[str(dev_n-1)], color='red', legend_label='Signal_raw', line_alpha=0.4) # pg
  for xx, yy, s, i in zip(list_of_lists_x, list_of_lists_y, list_of_device_names, range(len(list_of_lists_x))):
    p.varea(x=xx, y1=0, y2=yy, fill_alpha=0.2, color=colors[i], legend_label='Participants RMS')
  p.line(x='index', y='avr', source=df_rms_plot[df_rms_plot.avr.notna()], line_color = colors[10], line_width=3, legend_label='Average RMS')
  p.title.text_font_size = '20pt' #############
  p.legend.click_policy = 'hide'
  p.legend.orientation = 'horizontal'
  p.add_layout(Title(text='Time', align='center'), 'below')
  p.xaxis.ticker.desired_num_ticks = 20
  plots.append(p)

  # save section csv file
  output_dir = DIR + 'Concert_content/clapping_sections/'
  test_dir = DIR + 'test/'
  output_file_name = 'clapping_' + str(section_n).zfill(2) + '_at_' + str(ev_in + extend + shift_concert_time) + '_sec' + '.csv'
  df_section.to_csv(output_dir + title_figure.replace(' ', '_') + '.csv') # test

  # save figure html file
  figure_output_dir = DIR + 'Concert_content/clapping_sections_plots/'
  #output_file(figure_output_dir + title_figure.replace(' ', '_') + '.html')
  #save(p, figure_output_dir + title_figure.replace(' ', '_') + '.html')

  # increment counter
  section_n += 1

  # end events loop

# show & save all figures in one file
output_file(figure_output_dir + title_figure.replace(' ', '_') + '.html')
show(column(plots))

# save figures seperately
for p in plots:
  save(p, filename=(figure_output_dir + p.properties_with_values()['title'].text.replace(' ', '_') + '.html'))


In [None]:
# add span line to plots

pl = plots[-1] # which plot
span_sync = Span(location=ts_ev[(9701-9679)*50],
                            dimension='height', line_color='olive', line_width=1)
pl.add_layout(span_sync)
source = ColumnDataSource(data=dict(height=[100], weight=[ts_ev[(9701-9679)*50+25]], names=['Synchronous clapping']))
labels = LabelSet(x='weight', y='height', text='names', x_offset=5, y_offset=5, source=source, render_mode='canvas', text_color='olive', text_font_style='italic', text_font='helvetica', angle=math.pi/2)
pl.add_layout(labels)
output_file('final_plot_with_span_iso.html')
save(pl)

# Determine start end times clapping

In [None]:
# determine start end times clapping sessions

# directories
df_clappings = pd.read_csv(DIR + 'Concert_content/Concert_event_times_clean_tags.csv')
df_events = pd.read_csv(DIR + 'Concert_content/Concert_event_times_clean.csv')
events_dir = DIR + 'Concert_content/clapping_sections/'
events_list =  sorted(os.listdir(events_dir))

for ev in events_list:
  if ev.startswith('.'):
    events_list.remove(ev)

# for every session
for session_idx in range(15):  
  print(str(session_idx))

  if session_idx > len(events_list):
    break

  # read data
  #session_idx = 0 # which event
  file_name = events_list[session_idx]
  df_ev = pd.read_csv(events_dir + file_name, index_col=[0])
  df_ev.index = pd.to_datetime(df_ev.index).astype(np.int64)

  shift_concert_time = df_events[df_events.time_concert == 0].time.values[0]
  cols = df_ev.columns # which devices
  extend = 2*(10**7) * 4
  in_clapping_pins = []
  out_clapping_pins = []
  plots = []

  # calculate rms for each device
  df_rms = pd.DataFrame()
  for c in df_ev.columns:
    df_rms[c] = rms_signal(df_ev[c], SAMPLE_RATE/2, int(SAMPLE_RATE/2)).squeeze()

  for col in cols:
    # continue if bad
    if len(df_ev[col].dropna().index) < 50:
      print('continue bad data: ' + str(col))
      in_clapping_pins.append(np.nan)
      out_clapping_pins.append(np.nan)
      continue

    # data for signal
    source = ColumnDataSource(name=str(col), data=dict(x=pd.to_datetime(df_ev.index).strftime("%M:%S.%f").values, y=df_ev[col]))

    # data for thresholded signal
    thld = (df_rms[col].max() - df_rms[col].min())/2 # mid-range
    df_thld = df_ev[col].copy()
    df_thld[df_thld<thld] = np.nan
    source_threshold = ColumnDataSource(data=dict(x=pd.to_datetime(df_thld.index).strftime("%M:%S.%f").values, y=df_thld.values))

    # data for rms
    source_rms = ColumnDataSource(data=dict(x=pd.to_datetime(df_rms[df_rms[col].notna()].index).strftime("%M:%S.%f").values, y=df_rms[df_rms[col].notna()][col]))

    # in out
    in_clapping = df_thld.dropna().index[0] - extend
    out_clapping = df_thld.dropna().index[-1] + extend
    p1 = in_clapping*(10**-9)*SAMPLE_RATE
    p2 = out_clapping*(10**-9)*SAMPLE_RATE
    if in_clapping < 0:
      in_clapping = 0
    if out_clapping > df_ev.index[-1]:
      out_clapping = df_ev.index[-1]
    in_clapping_pins.append(in_clapping)
    out_clapping_pins.append(out_clapping)

    # initialise figure
    figure_name = 'Participant ' + str(int(col)+1).zfill(2) + ' - duration: ' + pd.to_datetime(out_clapping-in_clapping).strftime("%S+%f")
    p = figure(title = figure_name,
              plot_width=2200, plot_height=600, x_range=source.data['x'], y_range=(df_ev[col].dropna().min(),df_ev[col].dropna().max()+5), sizing_mode='stretch_width', tools='tap,pan,wheel_zoom,reset')

    # signal
    p.line(x='x', y='y', line_alpha=1, source=source, legend_label='Signal')
    # signal threshold
    #p.line(x='x', y='y', line_width=2, source=source_threshold, legend_label='Signal')
    # hrizontal span threshold
    p.line(x=source.data['x'], y=thld, color='black', legend_label='Threshold')
    # rms
    p.line(x='x', y='y', color='red', source=source_rms, legend_label='RMS')
    # signal dots clickable
    #p.scatter(x='x', y='y', color='red', source=source, legend_label='Dots')

    # mark in out
    if len(df_thld.dropna().index) > 0:
      #span_in = Span(location=df_thld.dropna().index[0]*(10**-9)*SAMPLE_RATE, dimension='height', line_color='olive', line_width=2)
      #span_out = Span(location=df_thld.dropna().index[-1]*(10**-9)*SAMPLE_RATE, dimension='height', line_color='olive', line_width=2)
      #p.add_layout(span_in)
      #p.add_layout(span_out)
      l_box = BoxAnnotation(right=p1, fill_alpha=0.1, fill_color='red')
      m_box = BoxAnnotation(left=p1, right=p2, fill_alpha=0.1, fill_color='olive')
      r_box = BoxAnnotation(left=p2, fill_alpha=0.1, fill_color='red')
      p.add_layout(l_box)
      p.add_layout(m_box)
      p.add_layout(r_box)

    callback = CustomJS(args=dict(source=source), code=
      """
        var selectedIndex = source.selected.indices;
        for (var i = 0; i < selectedIndex.length; i++) {
            console.log("Device:", source.name);
            console.log("Index:", selectedIndex[i]);
            console.log("Time:", source.data['x'][selectedIndex[i]]);
        }
      """
    )

    taptool = p.select(type=TapTool)
    source.selected.js_on_change('indices', callback)
    p.legend.click_policy = 'hide'
    p.xaxis.major_label_orientation = math.pi/2

    plots.append(p)

  # set ile name
  f_name = str(session_idx+1).zfill(2) + '_sep_' + df_clappings.iloc[session_idx].tag + '   ' + str(df_clappings.iloc[session_idx].start_time) + ' sec'

  # output file html
  output_file(DIR + 'Concert_content/clapping_sections_plots/' + f_name.replace(' ','_') + '.html')
  save(column(plots))

  # write in out points to csv
  df_inout = pd.DataFrame(index=['in', 'out'], columns=range(len(in_clapping_pins)), data=[in_clapping_pins, out_clapping_pins])
  for col in df_inout.columns:
    df_inout[col] = pd.to_datetime(df_inout[col])
  df_inout.T.to_csv(DIR + 'Concert_content/clapping_sections_inout/' + f_name.replace(' ','_') + '.csv')

  print('saved: ' + DIR + 'Concert_content/clapping_sections_plots/' + f_name.replace(' ','_') + '.html')
  print('saved: ' + DIR + 'Concert_content/clapping_sections_inout/' + f_name.replace(' ','_') + '.csv')
  print('____')
  print('\n')

In [None]:
# create csv files with individual clapping

events_dir = DIR + 'Concert_content/clapping_sections/'
events_list = sorted(os.listdir(events_dir))
events_inout_dir = DIR + 'Concert_content/clapping_sections_inout/'
events_inout_list = sorted(os.listdir(events_inout_dir))

# make sure if csv
for ev in events_list:
  if not ev.endswith('.csv'):
    events_list.remove(ev)

for ev in events_inout_list:
  if not ev.endswith('.csv'):
    events_inout_list.remove(ev)

for idx in range(len(events_list)):
  # read data in out times of event for participant
  df_inout = pd.read_csv(events_inout_dir + events_inout_list[idx], index_col=[0])
  for col in df_inout.columns:
    df_inout[col][df_inout[col].notna()] = pd.to_datetime(df_inout[col][df_inout[col].notna()]).astype('int64')

  # read data event
  df_ev = pd.read_csv(events_dir + events_list[idx], index_col=[0])
  df_ev.index = pd.to_datetime(df_ev.index).astype(np.int64)

  # fill nan outside the clapping interval
  for col in range(df_inout.shape[0]):
    in_, out_ = df_inout.iloc[col, 0], df_inout.iloc[col, 1]
    if not np.isnan(in_) and not np.isnan(out_):
      in_, out_ = int(in_), int(out_)
      df_ev.iloc[:,col][(df_ev.index < in_) | (df_ev.index > out_)] = np.nan

  output_dir = DIR + 'Concert_content/clapping_sections_cropped/'
  f_name = events_list[idx].replace('.csv','_cropped.csv')
  df_ev.to_csv(output_dir + f_name)
  print('saved csv ', idx)

saved csv  0
saved csv  1
saved csv  2
saved csv  3
saved csv  4
saved csv  5
saved csv  6
saved csv  7
saved csv  8
saved csv  9
saved csv  10
saved csv  11
saved csv  12
saved csv  13
saved csv  14


In [None]:
# pg - plot x y z norm of participants
dev_list = os.listdir(DIR + '/Aligned/Hall/')
plots=[]
for dev in dev_list[:10]:
  df_dev = pd.read_csv(DIR + '/Aligned/Hall/' + dev)
  df_dev = df_dev[df_dev.time_concert.between(-691230, -674100)]

  i = df_dev.index
  x = df_dev.x
  y = df_dev.y
  z = df_dev.z
  n = np.linalg.norm(df_dev[['x','y','z']].values, axis=1)

  p = figure(plot_height=600, sizing_mode='stretch_width')
  p.line(i,x,color=Category10[5][0],legend_label='x')
  p.line(i,x.diff(),color=Category10[4][1],legend_label='dx')
  p.line(i,y,color=Category10[5][2],legend_label='y')
  p.line(i,z,color=Category10[5][3],legend_label='z')
  p.line(i,n,color=Category10[5][4],legend_label='n')
  p.legend.click_policy = 'hide'
  plots.append(p)

output_file('xyzn.html')
show(column(plots))

# Plot - for each clapping session - for devices

In [None]:
# Plots for clapping sessions for events

# directories
df_clappings = pd.read_csv(DIR + 'Concert_content/Concert_event_times_clean_tags.csv')
events_dir = DIR + 'Concert_content/clapping_sections/'
events_list =  sorted(os.listdir(events_dir))

# select events
index_events = slice(0, df_clappings.shape[0]) # which events
exclude_events = [] # event indexes to exclude

n_events = len(events_list)
mask = np.zeros((n_events), dtype='bool')
mask[index_events] = True
mask[exclude_events] = False
events_list = np.array(events_list)[mask]
events_df_list = [pd.read_csv(events_dir + f).iloc[:,1:] for f in events_list]
colors = palette[20]*10
plots = []

# for every event
for i, df_ev in enumerate(events_df_list):
  title_figure = df_clappings.iloc[i].tag + '   ' + str(df_clappings.iloc[i].start_time) + ' sec'
  p = figure(title = title_figure, plot_width=1500, x_axis_type='datetime', sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')
  df_rms = pd.DataFrame()
  # calculate rms for each device
  for c in df_ev.columns:
    df_rms[c] = rms_signal(df_ev[c], SAMPLE_RATE, SAMPLE_RATE).squeeze()
  # for every device
  for j, col in enumerate(df_ev.columns):
    # plot signals
    p.line(x=df_ev.index, y=df_ev[col], color=colors[j], line_alpha=0.4, legend_label='Signal')
    # plot rms values
    x, y = df_rms.dropna().index, df_rms.dropna()[col] ###################################################33 neye göre dropna ??????????
    p.varea(x=x, y1=0, y2=y, color=colors[j], fill_alpha=0.2, legend_label='RMS')
  # plot overall avarage rms
  p.line(x=df_rms.dropna().index, y=df_rms.dropna().mean(axis=1), color=colors[2], line_width=3, legend_label='Avarage RMS')
  # configure plot
  p.title.text_font_size = '20pt'
  p.add_layout(Title(text='Time', align='center'), 'below')
  p.legend.click_policy = 'hide'
  p.legend.orientation = 'horizontal'
  p.legend.label_text_font_size = '6pt'
  p.xaxis.ticker.desired_num_ticks = 20
  # save plot
  plots.append(p)

# save figure
output_file('test.html')
show(column(plots))

# Plot - for one event - for each device

In [None]:
# Plots for each clapping session for each device

# directories
events_dir = DIR + 'Concert_content/clapping_sections/'
events_list =  sorted(os.listdir(events_dir))

for ev in events_list:
  if not ev.endswith('.csv'):
    events_list.remove(ev)

# select event
index_event = 0 # which event

# read data
df_ev = pd.read_csv(events_dir + events_list[index_event], index_col=[0])
df_ev.index = pd.to_datetime(df_ev.index)
df_clappings = pd.read_csv(DIR + 'Concert_content/Concert_event_times_clean_tags.csv')

# select devices
n_devices = len(df_ev.columns)
include_devices = slice(0, n_devices) # which devices
exclude_devices = [] # device indexes to exclude
mask = np.zeros((n_devices), dtype='bool')
mask[include_devices] = True
mask[exclude_devices] = False
columns = df_ev.columns[mask]

colors = Category10[10]
plots=[]

df_rms = pd.DataFrame()
# calculate rms for each device
for c in columns:
  df_rms[c] = rms_signal(df_ev[c], SAMPLE_RATE, SAMPLE_RATE).squeeze()
# for every device
for j, col in enumerate(columns):
  p = figure(title = col, plot_width=1500, x_axis_type='datetime', sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')
  # plot signals
  p.line(x='index', y=col, source=df_ev, color=colors[0], line_alpha=0.6, legend_label='Signal')
  # plot rms values
  x, y = df_rms.dropna().index, df_rms.dropna()[col] ################################################### neye göre dropna ??????????
  p.varea(x=x, y1=0, y2=y, color=colors[3], fill_alpha=0.2, legend_label='RMS')
  # configure plot
  p.add_layout(Title(text='Time', align='center'), 'below')
  p.legend.click_policy = 'hide'
  p.legend.orientation = 'horizontal'
  p.xaxis.ticker.desired_num_ticks = 20
  plots.append(p)

title_figure = df_clappings.iloc[index_event].tag + '   ' + str(df_clappings.iloc[index_event].start_time) + ' sec'
plots[0].add_layout(Title(text=title_figure, align='center', text_font_size='20pt'), 'above')

# save figure
output_file('test.html')
show(column(plots))

# html
html = file_html(plots, CDN, 'test')
with open("test_2.html", "w") as text_file:
    text_file.write(html)

# Plot stats for energy

In [None]:
# plot rms stats

sample_rate = 50 # Hz

dir_events = DIR + 'Concert_content/clapping_sections_cropped/'
f_clappings_with_tags = DIR + 'Concert_content/Concert_event_times_clean_tags.csv' 
output_dir = DIR + 'Concert_content/other_plot/'

events = sorted([dir_events + ev for ev in os.listdir(dir_events) if ev.endswith('.csv')])
df_clappings = pd.read_csv(f_clappings_with_tags)

"""
sum_rms_x = []
sum_rms_y = []
for ev in events[:1]:
  df_ev = pd.read_csv(ev, index_col=0)
  data = pd.DataFrame(index = df_ev.index)
  for col in df_ev.columns:
    data[col] = rms_signal(df_ev[col], sample_rate, sample_rate)
  sum_rms_y.append(data.mean(axis=1).dropna().sum())

"""

data = pd.DataFrame()
for i, ev in enumerate(events):
  print(ev)
  rms_list = []
  df_ev = pd.read_csv(ev, index_col=0)
  for col in df_ev.columns:
    rms_list.append(rms_signal(df_ev[col].dropna(), SAMPLE_RATE, SAMPLE_RATE).mean().values[0])
    #rms_list.append(df_ev[col].shape[0])
  print(len(rms_list))
  data[str(i).zfill(2)] = rms_list

data = data.T.reset_index(drop=True)
data.index = data.index.astype(str)

labels_list = df_clappings.tag.values

fig_title = 'Avarage energies of clapping sessions'
p = figure(title = fig_title, x_range=labels_list, plot_width=1250, plot_height=1200, sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')

#for col, c in zip(data.columns, palette[20]*10):
  #p.varea(x=labels_list, y1=0, y2=data[col], fill_color=c, fill_alpha=0.1, legend_label='Devices')
  #p.line(x=labels_list, y=data[col], line_color=c, line_alpha=0.1, legend_label='Devices')
  #p.scatter(x=labels_list, y=data[col], fill_color=c, hatch_color=c, line_color=c, fill_alpha=0.7, legend_label='Devices')

p.vbar(x=labels_list, top=data.mean(axis=1), width = 0.75, fill_alpha = 0.4, legend_label='Average') #, line_color=palette[20][2], line_width=3)

#p.xaxis.ticker = df_clappings.tag.to_list()
p.xaxis.major_label_overrides = dict(zip(range(data.shape[0]), df_clappings.tag.iloc[:data.shape[0]]))
#p.xaxis.ticker.desired_num_ticks = data.shape[0]
p.xaxis.major_label_orientation = 3*math.pi/8
p.xaxis.major_label_text_font_size = '14pt'
p.legend.click_policy = 'hide'
p.title.text_font_size = '20pt' #############

p.add_layout(Title(text='Energy', align='center'), 'left')

output_file_name = fig_title.replace(' ', '_') + '.html'
output_file(output_dir + output_file_name)

save(p)


"""
list_of_list_x = [data.index for i in range(df_ev.shape[0])]
list_of_list_y = [data.iloc[i].to_list() for i in range(df_ev.shape[0])]
list_labels = [str(i) for i in range(df_ev.shape[0])]

data_bokeh = pd.DataFrame()
data_bokeh['x'] = list_of_list_x
data_bokeh['y'] = list_of_list_y
data_bokeh['color'] = (palette[20]*100)[:len(data)]
data_bokeh['labels'] = list_labels


p = figure(title = 'Total RMS for clapping ' + ev.split('_')[-1].split('.')[0], plot_width=1250, plot_height=300, sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')
p.add_layout(Title(text='Clappings', align='center'), 'below')
p.add_layout(Title(text='Values', align='center'), 'left')
p.legend.location = 'above'
p.multi_line(xs='x', ys='y', source=data_bokeh, color='color', legend='labels')
p.xaxis.ticker = np.arange(df_ev.shape[0])
p.legend.click_policy="hide"
show(p)
"""

'DONE'

/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/01_When_musicians_appear,_before_the_introduction__946.0_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/02_Between_Frederik_&_Simon_talks__1081.19_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/03_After_the_introduction,_before_the_concert_begins__1668.07_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/04_Beethoven__3247.6_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/05_Musicians_back_to_stage_for_Schnittke__3641.69_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/06_Schnittke__4919.6_sec_cropped.csv
77
/content/drive/MyDrive/ritmo/CopenhagenMusicLab/Concert_content/clapping_sections_cropped/07

'DONE'

# Plot stats for duration

In [None]:
# plot stats duration

sample_rate = 50 # Hz

dir_events = DIR + 'Concert_content/clapping_sections/'
f_clappings_with_tags = DIR + 'Concert_content/Concert_event_times_clean_tags.csv' 
output_dir = DIR + 'Concert_content/other_plot/'

events = sorted([dir_events + ev for ev in os.listdir(dir_events) if ev.endswith('.csv')])
df_clappings = pd.read_csv(f_clappings_with_tags)

"""
sum_rms_x = []
sum_rms_y = []
for ev in events[:1]:
  df_ev = pd.read_csv(ev, index_col=0)
  data = pd.DataFrame(index = df_ev.index)
  for col in df_ev.columns:
    data[col] = rms_signal(df_ev[col], sample_rate, sample_rate)
  sum_rms_y.append(data.mean(axis=1).dropna().sum())

"""

data = pd.DataFrame()
for ev in events:
  rms_list = []
  df_ev = pd.read_csv(ev, index_col=[0])
  for col in df_ev.columns:
    #rms_list.append(rms_signal(df_ev[col], sample_rate, sample_rate).mean().values[0])
    rms_list.append(df_ev[col].shape[0]/50/60) # min
  data[ev.split('_')[-2].split('.')[0]] = rms_list

data = data.T.reset_index(drop=True)
data.index = data.index.astype(str)

labels_list = df_clappings.tag.values

fig_title = 'Durations of clapping sessions'
p = figure(title = fig_title, x_range=labels_list,
           plot_width=1250, plot_height=1200, sizing_mode='stretch_width',
           tools='pan,wheel_zoom,box_zoom,reset')

#for col, c in zip(data.columns, palette[20]*10):
  #p.varea(x=labels_list, y1=0, y2=data[col], fill_color=c, fill_alpha=0.1, legend_label='Devices')
  #p.line(x=labels_list, y=data[col], line_color=c, line_alpha=0.1, legend_label='Devices')
  #p.scatter(x=labels_list, y=data[col], fill_color=c, hatch_color=c, line_color=c, fill_alpha=0.7, legend_label='Devices')


lis_1 = [3,5,7,8,9,10,11,12]
lis_2 = [0,1,2,4,6,13]
lis_3 = [14]

p.vbar(x=[labels_list[i] for i in lis_2], top=data.iloc[lis_2,:].mean(axis=1), width = 0.75, fill_alpha = 0.4, fill_color='red', line_color='red', legend_label='Pieces') #, line_color=palette[20][2], line_width=3)
p.vbar(x=[labels_list[i] for i in lis_1], top=data.iloc[lis_1,:].mean(axis=1), width = 0.75, fill_alpha = 0.4, fill_color='olive', line_color='olive', legend_label='Other Claps') #, line_color=palette[20][2], line_width=3)
p.vbar(x=[labels_list[i] for i in lis_3], top=data.iloc[lis_3,:].mean(axis=1), width = 0.75, fill_alpha = 0.4, fill_color='grey', line_color='grey', legend_label='Final') #, line_color=palette[20][2], line_width=3)
#p.vbar(x=labels_list, top=data.mean(axis=1), width = 0.75, fill_alpha = 0.4, fill_color='olive', legend_label='Sessions') #, line_color=palette[20][2], line_width=3)

#p.xaxis.ticker = df_clappings.tag.to_list()
p.xaxis.major_label_overrides = dict(zip(range(data.shape[0]), df_clappings.tag.iloc[:data.shape[0]]))
#p.xaxis.ticker.desired_num_ticks = data.shape[0]
p.xaxis.major_label_orientation = 3*math.pi/8
p.xaxis.major_label_text_font_size = '14pt'
p.legend.click_policy = 'hide'
p.legend.location = 'top_left'
p.title.text_font_size = '20pt' #############

#p.add_layout(Title(text='Events', align='center'), 'below')

source = ColumnDataSource(data=dict(height=data.mean(axis=1).values,
                                    weight=labels_list,
                                    names=[(str(round(i,2)) + ' mins') for i in data.mean(axis=1).values]))
labels = LabelSet(x='weight', y='height', text='names',
              x_offset=5, y_offset=5, source=source,
              render_mode='canvas', text_align='center', text_font_style='normal', text_font='helvetica')


p.add_layout(labels)
p.add_layout(Title(text='Minute', align='center'), 'left')

output_file_name = fig_title.replace(' ', '_') + '.html'
output_file(output_dir + output_file_name)

save(p)

"""
list_of_list_x = [data.index for i in range(df_ev.shape[0])]
list_of_list_y = [data.iloc[i].to_list() for i in range(df_ev.shape[0])]
list_labels = [str(i) for i in range(df_ev.shape[0])]

data_bokeh = pd.DataFrame()
data_bokeh['x'] = list_of_list_x
data_bokeh['y'] = list_of_list_y
data_bokeh['color'] = (palette[20]*100)[:len(data)]
data_bokeh['labels'] = list_labels


p = figure(title = 'Total RMS for clapping ' + ev.split('_')[-1].split('.')[0], plot_width=1250, plot_height=300, sizing_mode='stretch_width', tools='pan,wheel_zoom,box_zoom,reset')
p.add_layout(Title(text='Clappings', align='center'), 'below')
p.add_layout(Title(text='Values', align='center'), 'left')
p.legend.location = 'above'
p.multi_line(xs='x', ys='y', source=data_bokeh, color='color', legend='labels')
p.xaxis.ticker = np.arange(df_ev.shape[0])
p.legend.click_policy="hide"
show(p)
"""

'DONE'

'DONE'

# clapping graph by time

In [18]:
# clapping graph by time

# directories
events_dir = DIR + 'Concert_content/clapping_sections/'
events_list =  sorted(os.listdir(events_dir))

for ev in events_list:
  if not ev.endswith('.csv'):
    events_list.remove(ev)

session_idx = 0
df_ev = pd.read_csv(events_dir + events_list[0], index_col=[0])
df_ev.index = pd.to_datetime(df_ev.index).astype(np.int64)
df_rms = pd.DataFrame(index = df_ev.index)
for col in df_ev.columns:
  df_rms[col] = rms_signal(df_ev[col], 4, 1).values

for t in df_rms.index.values[5:7]:
  p = figure(sizing_mode='fixed')
  p.vbar(x=df_rms.loc[t].index.astype('int64'), top=df_rms.loc[t].values)
  export_png(p, filename=str(t))

RuntimeError: ignored

In [None]:
df_rms.loc[t].index.astype('int64'), df_rms.loc[t].values

(Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
             34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
             51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
             68, 69, 70, 71, 72, 73, 74, 75, 76],
            dtype='int64'),
 array([ 3.02652446, 17.6169734 ,  3.61512877,  0.4913683 ,  2.62040353,
         2.56756137,  4.06157834,  1.6512613 ,  2.02561306,  2.17889344,
         1.35469565,  3.45765538,  1.81418274,  2.05208658, 23.4668258 ,
         3.66418238,  3.82889448,  1.29369076,  0.71935749,  0.64328747,
         1.12424527,  1.42590097,         nan,  1.62717039,  1.68347083,
         1.3235089 ,  0.72715467,  1.23529023,  3.65342651,  1.49818185,
         6.2101454 ,  2.44474033,  0.5488194 ,  1.34267216,  1.13432867,
         1.40184526,  2.58153183,  1.16898375,  1.15302013,  1.56133376,
         4.780

# Sitting map

In [None]:
# sitting map *BAD*

# read data
events_dir = DIR + 'Concert_content/clapping_sections_cropped/'
events_list = sorted(os.listdir(events_dir))

# make sure if csv
for ev in events_list:
  if not ev.endswith('.csv'):
    events_list.remove(ev)

for ev in events_inout_list:
  if not ev.endswith('.csv'):
    events_inout_list.remove(ev)

rows=[]
df = pd.DataFrame(columns=np.arange(7), index=np.arange(11))
df_ev = pd.read_csv(events_dir + events_list[0], index_col=[0])
t = 300000000

for col in df_ev.columns:
  df_ev[col] = rms_signal(df_ev[col], SAMPLE_RATE, 1)

s_ev = np.array(df_ev.loc[t])
s_ev = np.reshape(s_ev, (11,7))

p = figure()
for i, r in enumerate(s_ev):
  p.scatter(np.arange(7), np.ones(7)*i, size=20, alpha=r[2])
show(p)

# pg
---



array([2., 2., 2., 2., 2., 2., 2., 2.])

In [None]:
# 

In [None]:
cols = [[11,12,13], [15,16,17]]
df = pd.DataFrame(data=cols).T
df