# This is how the /EGOCOM/raw_audio dataset is generated.
## Extracts raw audio of EGOCOM dataset from original source

In [2]:
from __future__ import print_function, absolute_import, division, unicode_literals, with_statement # Python 2 compatibility

from egocom import audio
import numpy as np
import os
import subprocess

from skimage.feature import register_translation
import soundfile as sf

In [2]:
def cosine_similarity(u,v):
    un = np.linalg.norm(u)
    vn = np.linalg.norm(v)
    return np.dot(u / un, v / vn)

### 5min converastions broken into parts (day1con1 through day2con2)

In [3]:
samplerate = 44100
raw_loc = "/mnt/surreal/datasets/EGOCOM_IntermediateRawBackups/EGOCOM-ORIGINAL-RAW-RECORDINGS/"
data_loc = "/mnt/surreal/datasets/EGOCOM/240p/5min_parts/"
write_loc = "/mnt/surreal/datasets/EGOCOM/raw_audio/"
fn_dict = {}
for fn in sorted([v for v in os.listdir(data_loc) if v[-4:] == ".MP4"]):
    key = fn[9:23] + fn[32:37] if 'part' in fn else fn[9:21]
    fn_dict[key] = fn_dict[key] + [fn] if key in fn_dict else [fn]
key2vid = {fn[9:-4]:fn[:9] for fn in sorted([v for v in os.listdir(data_loc) if v[-4:] == ".MP4"])}

In [4]:
locs = ["day1/conversation1/", "day1/conversation2/", "day1/conversation3/", "day1/conversation4/", "day1/conversation5/", "day2/conversation1/", "day2/conversation2/"]
for loc in locs:
    # Get all the raw original audio for each conversation
    day, con = [z[-1] for z in loc[:-1].split("/")]
    path = raw_loc + loc
    list_of_files = [[path + device + "/" + fn for fn in sorted(os.listdir(path+device+"/"))] for device in sorted(os.listdir(path))]
    lof = list(map(list, zip(*list_of_files)))# transposes
    raw_parts = [audio.get_samplerate_wav_from_list_of_mp4_fns(files, normalize_wav=False)[1] for files in lof]
    
    # For each part of each conversation, find the shift between raw and egocom version
    for i, raw_part_per_person in enumerate(raw_parts):
        # Get egocom version of audio.
        key = "day_"+day+"__con_"+con+"__part"+str(i+1)
        files = [data_loc + z for z in fn_dict[key]]
        egocom_part_per_person = audio.get_samplerate_wav_from_list_of_mp4_fns(files)[1]
        for person, raw_part in enumerate(raw_part_per_person):
            key = "day_"+day+"__con_"+con+"__person_"+str(person+1)+"_part"+str(i+1)
            egocom_part = egocom_part_per_person[person]
            assert(len(egocom_part) <= len(raw_part))
            # Find shift and trim raw audio to match egocom audio.
            shift = int(register_translation(audio.norm_center_clip(abs(raw_part).sum(axis=1)[:len(egocom_part)]), abs(egocom_part).sum(axis=1))[0][0])
            shift = max(shift, 0) # Can't be negative
            trimmed_part = raw_part[shift:shift+len(egocom_part)]
            # If the shift is so large that now the trimmed_part is always smaller than egocom, reduce shift
            if len(trimmed_part) < len(egocom_part):
                shift = max(shift + len(trimmed_part) - len(egocom_part), 0)
                trimmed_part = raw_part[shift:shift+len(egocom_part)]
            # Write out.
            wfn = key2vid[key] + key
            for kind in ["wav", "flac"]:
                with open(write_loc+kind+"/"+wfn+"."+kind, 'wb') as f:
                    sf.write(f, trimmed_part, samplerate)
                    
            print(wfn, shift // samplerate, 'cossim', cosine_similarity(abs(egocom_part).sum(axis=1), abs(abs(trimmed_part).sum(axis=1))))

vid_001__day_1__con_1__person_1_part1 24 cossim 0.9982131794237141
vid_006__day_1__con_1__person_2_part1 19 cossim 0.9988442934219808
vid_011__day_1__con_1__person_3_part1 25 cossim 0.5628686338185761
vid_002__day_1__con_1__person_1_part2 4 cossim 0.9985852418346841
vid_007__day_1__con_1__person_2_part2 0 cossim 0.9992071310708976
vid_012__day_1__con_1__person_3_part2 5 cossim 0.6390686125893413
vid_003__day_1__con_1__person_1_part3 4 cossim 0.9952026776362861
vid_008__day_1__con_1__person_2_part3 0 cossim 0.9991713190846717
vid_013__day_1__con_1__person_3_part3 5 cossim 0.5997769675303142
vid_004__day_1__con_1__person_1_part4 4 cossim 0.9985926381672157
vid_009__day_1__con_1__person_2_part4 0 cossim 0.9990352640895293
vid_014__day_1__con_1__person_3_part4 5 cossim 0.5540488076451487
vid_005__day_1__con_1__person_1_part5 4 cossim 0.9981458509436139
vid_010__day_1__con_1__person_2_part5 0 cossim 0.9988238419216402
vid_015__day_1__con_1__person_3_part5 5 cossim 0.5893617206955698
vid_016

### videos 88-175: 20min converastions (day2con3 - )

In [15]:
samplerate = 44100
raw_loc = "/mnt/surreal/datasets/EGOCOM_IntermediateRawBackups/EGOCOM-ORIGINAL-RAW-RECORDINGS/"
data_loc = "/mnt/surreal/datasets/EGOCOM/240p/20min/"
write_loc = "/mnt/surreal/datasets/EGOCOM/raw_audio/"
fn_dict = {}
for fn in sorted([v for v in os.listdir(data_loc) if v[-4:] == ".MP4"]):
    key = fn[9:23] + fn[32:37] if 'part' in fn else fn[9:21]
    fn_dict[key] = fn_dict[key] + [fn] if key in fn_dict else [fn]
key2vid = {fn[9:-4]:fn[:9] for fn in sorted([v for v in os.listdir(data_loc) if v[-4:] == ".MP4"])}

In [30]:
for day in sorted([folder_name + "/" for folder_name in os.listdir(raw_loc) if 'day' in folder_name]):
    for con in sorted([fn + "/" for fn in os.listdir(raw_loc + day) if 'conversation' in fn]):
        if day + con not in locs: # Avoid day1con1 through day2con2
            key = "day_" + day[-2] + "__con_" + con[-2]
#             if (int(day[-2]) >= 3 and int(con[-2]) >= 3) or int(day[-2]) >= 4:
            print(key)
            path = raw_loc + day + con
            # Get the raw original audio for each speaker
            raw_files = [path + v for v in sorted(os.listdir(path)) if v[-4:] == ".MP4"]
            raw_wavs = audio.get_samplerate_wav_from_list_of_mp4_fns(raw_files, normalize_wav=False)[1]
            # Get the egocom audio for each speaker
            ego_files = [data_loc + z for z in fn_dict[key]]
            egocom_wavs = audio.get_samplerate_wav_from_list_of_mp4_fns(ego_files, normalize_wav=False)[1]
            for person, raw_wav in enumerate(raw_wavs):
                egocom_wav = egocom_wavs[person]
                assert(len(egocom_wav) <= len(raw_wav))
                # Find shift and trim raw audio to match egocom audio.
                shift = int(register_translation(audio.norm_center_clip(abs(raw_wav).sum(axis=1)[:len(egocom_wav)]), abs(egocom_wav).sum(axis=1))[0][0])
                shift = max(shift, 0) # Can't be negative
                trimmed_wav = raw_wav[shift:shift+len(egocom_wav)]
                # If the shift is so large that now the trimmed_wav is always smaller than egocom, reduce shift
                if len(trimmed_wav) < len(egocom_wav):
                    shift = max(shift + len(trimmed_wav) - len(egocom_wav), 0)
                    trimmed_wav = raw_wav[shift:shift+len(egocom_wav)]
                # Write out.
                wfn = fn_dict[key][person][:-4]
                for kind in ["wav", "flac"]:
                    with open(write_loc+kind+"/"+wfn+"."+kind, 'wb') as f:
                        sf.write(f, trimmed_wav, samplerate)

                print(wfn, shift // samplerate, 'cossim', cosine_similarity(abs(egocom_wav).sum(axis=1), abs(abs(trimmed_wav).sum(axis=1))))
                

day_3__con_3
vid_107__day_3__con_3__person_1 45 cossim 0.9989238581728358
vid_108__day_3__con_3__person_2 45 cossim 0.9986594228268206
vid_109__day_3__con_3__person_3 45 cossim 0.9985476457753422
day_3__con_4
vid_110__day_3__con_4__person_1 88 cossim 0.9967201174887037
vid_111__day_3__con_4__person_2 88 cossim 0.9991665686779512
vid_112__day_3__con_4__person_3 88 cossim 0.9967476852106013
day_3__con_5
vid_113__day_3__con_5__person_1 50 cossim 0.9974781603944064
vid_114__day_3__con_5__person_2 50 cossim 0.9992816927110072
vid_115__day_3__con_5__person_3 50 cossim 0.9942777621822008
day_3__con_6
vid_116__day_3__con_6__person_1 25 cossim 0.9988903689247522
vid_117__day_3__con_6__person_2 25 cossim 0.9986812542337273
vid_118__day_3__con_6__person_3 25 cossim 0.9959746611890536
day_4__con_1
vid_119__day_4__con_1__person_1 82 cossim 0.9989016542991616
vid_120__day_4__con_1__person_2 82 cossim 0.9985493530185888
vid_121__day_4__con_1__person_3 82 cossim 0.9981773322774563
day_4__con_2
vid_122