# Part 4: XML creation
## Dependencies: OTIO and pandas



In [1]:
#audio
from __future__ import print_function
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import librosa
import librosa.display

In [2]:
import pandas as pd 
import pyloudnorm as pyln

In [3]:
#video
import moviepy
from moviepy.editor import *
import opentimelineio as otio
import argparse

Failed to establish dbus connection

### Quick summary, OTIO can output XML that CAN be used to import timelines. We just need to learn the behavior and then create a loop to automate clipping, using a imported dataframe as a outline.

## Testing opentimelineio


In [4]:
import opentimelineio as otio
import opentimelineio.test_utils as otio_test_utils

# Timeline Outline behavior testing
- First create a list of videos
    - sfs
- Sec create list of audio

- Link audio to video, IE if mic1 use center camera

## Use the lists of camera and audio to createa loops creating timeranges

In [5]:
VIDEO_FILEPATH_LIST= ["Batman Middle.mp4","Batman Right.mp4","Batman Left.mp4"]

# "Batman Right.mp4" is 1   ||  "Batman Left.mp4" is 2
AUDIO_FILEPATH_LIST=["Data/Audio/Osi Audio Extracted.wav", "Data/Audio/Scott Audio Extracted.wav", \
               "Data/Audio/Chukwu Audio Extracted.wav", "Data/Audio/Crystal Audio Extracted.wav"]

In [6]:
AUDIO_VIDEO_TUPLE_LIST=[("Data/Audio/Osi Audio Extracted.wav", 1), ("Data/Audio/Scott Audio Extracted.wav",1), \
               ("Data/Audio/Chukwu Audio Extracted.wav",2), ("Data/Audio/Crystal Audio Extracted.wav",2)]

In [7]:

# build the structure
tl = otio.schema.Timeline(name="Example timeline")

# add track for each video file and each audio file
#for each file add a track
for i in VIDEO_FILEPATH_LIST :
    tr = otio.schema.Track(name=i)
    tl.tracks.append(tr)

for i in AUDIO_VIDEO_TUPLE_LIST:
    tr = otio.schema.Track(name=i[0])
    tl.tracks.append(tr)

In [8]:
#testing if the tracks are added
#otio.adapters.write_to_file(tl, 'Baretimeline.otio')

# With the timeline and tracks set, now is the time to match add clips to each of them, Fairly skeleton based

In [9]:
#import data with the following: 
#audio idxmax (we will use this to link the audio video tuple list), 
#the  list of tuples/intervals 


# Importing a dataframe pkl, then using using Idxmax to create intervals of data in tuple form. Tuples represent the start and end frame of when mic is dominant. Camera 0 is default cam in case of uncertainty, which is center cam in this scenario.

## the audio_video_tuple, idxmax matches the audio index and returns the video [1]

In [10]:
data_df=pd.read_pickle('idxmax.pkl')

In [11]:
def dataframe_getintervals(series,desiredvalue):
    #make sure series is the df['column']
    t=series.index[series==desiredvalue].to_series()
    interval_list=t.groupby(t.diff().ne(1).cumsum()).agg(['first','last']).apply(tuple,1).tolist()
    
    return interval_list

In [12]:
#create lists of tuples for each camera

list_of_idxmax_mic_data=[]
for i in range(len(AUDIO_VIDEO_TUPLE_LIST)+1):
    tuple_list=dataframe_getintervals(data_df['idxmax'],i)
    list_of_idxmax_mic_data.append(tuple_list)

    


In [13]:
list_of_idxmax_mic_data

[[(0, 2499), (651406, 653903)],
 [(2500, 5407),
  (26128, 60419),
  (75156, 87230),
  (165286, 181913),
  (247738, 253717),
  (254146, 256274),
  (315966, 317251),
  (336047, 344445),
  (358275, 361633),
  (369965, 374352),
  (376815, 380256),
  (390884, 393382),
  (424250, 425515),
  (427902, 435067),
  (457548, 505325),
  (527483, 536514),
  (565965, 567731),
  (625211, 626028),
  (632923, 637252),
  (639049, 648955)],
 [(14939, 21557),
  (22121, 24847),
  (60420, 75155),
  (126906, 128349),
  (253718, 254145),
  (269021, 270059),
  (309370, 311423),
  (317252, 320449),
  (344446, 352742),
  (355574, 358274),
  (361634, 363000),
  (374353, 376814),
  (447659, 455924),
  (505326, 510216),
  (512779, 518819),
  (607771, 625210),
  (629249, 632922),
  (648956, 649580)],
 [(5408, 11596),
  (21558, 22120),
  (24848, 26127),
  (123900, 126905),
  (128350, 141712),
  (143753, 146854),
  (185048, 207908),
  (226829, 228890),
  (256275, 269020),
  (270060, 291023),
  (294911, 304445),
  (3145

# Variables and their definitions

```py 
list_of_idxmax_mic_data  
```
- List of lists of tuples defining the (starting frame, ending frame)
- Position on the list describes which audio file is used, where 0 is unsure
- IE: list_of_idxmax_mic_data[1] is the (starting frame, ending frame) of Audio file 1



```py 
VIDEO_FILEPATH_LIST  
```
- List of video filepaths. 
- We will use the list indexes to reference audio video pairs in the next variable

```py 
AUDIO_VIDEO_TUPLE_LIST 
```
- A Tuple of (Audio Filepath, VIDEO_FILEPATH_LIST index of associated audio)
- use this to connect a audio to video track

In [91]:
#skeletal outline of otio


# build the structure
tl = otio.schema.Timeline(name="Example timeline")

# add track for each video file and each audio file
#for each file add a track

#create lists for each track to reference back to later
#vtr is video track, etc.
vtr_list=[]
atr_list=[]


for i in VIDEO_FILEPATH_LIST :
    vtr = otio.schema.Track(name=i)
    tl.tracks.append(vtr)
    vtr_list.append(vtr)
    
for i in AUDIO_VIDEO_TUPLE_LIST:
    atr = otio.schema.Track(name=i[0])
    tl.tracks.append(atr)
    atr_list.append(atr)
    


In [92]:
vtr_list[1]

otio.schema.Track(name='Batman Right.mp4', children=[], source_range=None, metadata={})

In [93]:
vrate=24
arate=500

In [90]:
# i is to keep track which audio file we are currently on
i=-1

# i2 is to keep track of how many clips there are
i2=0

for lists_of_tuples in list_of_idxmax_mic_data:
    #connect the list of tuples to the audio file
    #ignore 0 for now. We will come back to this later, i will still be -1 but we will add a default clip instead
    if i==-1:
        pass
    
    else: 
        print(AUDIO_VIDEO_TUPLE_LIST[i],VIDEO_FILEPATH_LIST[AUDIO_VIDEO_TUPLE_LIST[i][1]])
    
        # Connect the audio and video tracks 
        atrack=atr_list[i]
        afname=AUDIO_VIDEO_TUPLE_LIST[i][0]
        
        # AUDIO_VIDEO_TUPLE_LIST[i][1] references the VIDEO_FILEPATH_LIST to determine which video file to use
        vtrack=vtr_list[AUDIO_VIDEO_TUPLE_LIST[i][1]]
        vfname=VIDEO_FILEPATH_LIST[AUDIO_VIDEO_TUPLE_LIST[i][1]]
        
        
    #=========================================================================
    
    #the adding of clips using the list_of_idxmax_mic_data index starts here        
        for tuples in lists_of_tuples:
            
        #=====================================================
        
            # adding video clips 
            #convert the tuple ranges from audio rate to video rate
            #audio rate=500

            vstarttime=tuples[0]/arate*vrate
            vduration=tuples[1]/arate*vrate
            
            print(vstarttime,vduration)
        

            
            i2=i2+1

    i=i+1

('Data/Audio/Osi Audio Extracted.wav', 1) Batman Right.mp4
120.0 259.536
1254.144 2900.112
3607.4880000000003 4187.04
7933.728 8731.824
11891.423999999999 12178.416000000001
12199.008 12301.152
15166.368 15228.047999999999
16130.256000000001 16533.36
17197.199999999997 17358.384
17758.32 17968.896
18087.12 18252.288
18762.432 18882.336
20364.0 20424.72
20539.296 20883.216
21962.304 24255.6
25319.183999999997 25752.672
27166.32 27251.088
30010.128 30049.344
30380.304 30588.095999999998
30674.352 31149.840000000004
('Data/Audio/Scott Audio Extracted.wav', 1) Batman Right.mp4
717.072 1034.7359999999999
1061.808 1192.656
2900.16 3607.44
6091.488 6160.7519999999995
12178.464 12198.960000000001
12913.008000000002 12962.832000000002
14849.76 14948.304
15228.096000000001 15381.552
16533.408000000003 16931.616
17067.552 17197.152000000002
17358.432 17424.0
17968.944 18087.072
21487.631999999998 21884.352
24255.648 24490.368000000002
24613.392 24903.311999999998
29173.007999999998 30010.08
30203

In [60]:
otio.adapters.write_to_file(tl, 'videoclipsonly.otio')

True

In [20]:
vtimerange

NameError: name 'vtimerange' is not defined

In [62]:
vtimerange2

otio.opentime.TimeRange(start_time=otio.opentime.RationalTime(value=120, rate=24), duration=otio.opentime.RationalTime(value=139.536, rate=24))

# List of starting time of being the dominant speaker and the duration
```py
('Data/Audio/Osi Audio Extracted.wav', 1)
('Data/Audio/Osi Audio Extracted.wav', 1) Batman Right.mp4
5.0 5.814
52.256 68.582
150.312 24.148
330.572 33.254
495.476 11.958
508.292 4.256
631.932 2.57
672.094 16.796
716.55 6.716
739.93 8.774
753.63 6.882
781.768 4.996
848.5 2.53
855.804 14.33
915.096 95.554
1054.966 18.062
1131.93 3.532
1250.422 1.634
1265.846 8.658
1278.098 19.812
('Data/Audio/Scott Audio Extracted.wav', 1) Batman Right.mp4
29.878 13.236
44.242 5.452
120.84 29.47
253.812 2.886
507.436 0.854
538.042 2.076
618.74 4.106
634.504 6.394
688.892 16.592
711.148 5.4
723.268 2.732
748.706 4.922
895.318 16.53
1010.652 9.78
1025.558 12.08
1215.542 34.878
1258.498 7.346
1297.912 1.248
('Data/Audio/Chukwu Audio Extracted.wav', 2) Batman Left.mp4
10.816 12.376
43.116 1.124
49.696 2.558
247.8 6.01
256.7 26.724
287.506 6.202
370.096 45.72
453.658 4.122
512.55 25.49
540.12 41.926
589.822 19.068
629.012 2.918
706.598 4.548
726.002 6.506
760.514 13.74
778.346 3.42
786.766 27.816
1020.924 4.632
1073.03 34.318
1185.776 15.498
1208.034 7.506
1299.354 3.456
('Data/Audio/Crystal Audio Extracted.wav', 2) Batman Left.mp4
23.194 6.682
174.462 73.336
283.426 4.078
293.71 36.86
363.828 6.266
415.818 37.838
457.782 37.692
582.048 7.772
608.892 9.846
622.848 6.162
640.9 31.192
705.486 1.11
732.51 7.418
774.256 4.088
814.584 33.914
851.032 4.77
870.136 25.18
911.85 3.244
1020.434 0.488
1037.64 17.324
1107.35 24.578
1135.464 50.31
1201.276 6.756
1252.058 6.438
1274.506 3.59
1299.162 0.19
```

In [94]:
# i is to keep track which audio file we are currently on
i=-1

# i2 is to keep track of how many clips there are
i2=0

for lists_of_tuples in list_of_idxmax_mic_data:
    #connect the list of tuples to the audio file
    #ignore 0 for now. We will come back to this later, i will still be -1 but we will add a default clip instead
    if i==-1:
        pass
    
    else: 
        print(AUDIO_VIDEO_TUPLE_LIST[i],VIDEO_FILEPATH_LIST[AUDIO_VIDEO_TUPLE_LIST[i][1]])
    
        # Connect the audio and video tracks 
        atrack=atr_list[i]
        afname=AUDIO_VIDEO_TUPLE_LIST[i][0]
        
        # AUDIO_VIDEO_TUPLE_LIST[i][1] references the VIDEO_FILEPATH_LIST to determine which video file to use
        vtrack=vtr_list[AUDIO_VIDEO_TUPLE_LIST[i][1]]
        vfname=VIDEO_FILEPATH_LIST[AUDIO_VIDEO_TUPLE_LIST[i][1]]
        
        
    #=========================================================================
    
    #the adding of clips using the list_of_idxmax_mic_data index starts here        
        for tuples in lists_of_tuples:
            
        #=====================================================
        
            # adding video clips 
            #convert the tuple ranges from audio rate to video rate
            #audio rate=500
            vstarttime=tuples[0]/arate*vrate
            vduration=tuples[1]/arate*vrate

                   
            vtimerange=otio.opentime.TimeRange(
                start_time=otio.opentime.RationalTime(vstarttime, vrate),
                duration=otio.opentime.RationalTime(vduration, vrate))



            vref = otio.schema.ExternalReference(target_url=vfname,
            available_range=vtimerange)




            # attach the reference to the clip
            vcl = otio.schema.Clip(
                name="vClip{}".format(i2 + 1),
                media_reference=vref,

                # available_range_from_list is the 
                source_range=otio.opentime.TimeRange(
                    start_time=otio.opentime.RationalTime(
                        vtimerange.start_time.value,
                        vtimerange.start_time.rate
                    ),
                    duration=otio.opentime.RationalTime(
                        vtimerange.duration.value,
                        vtimerange.duration.rate
                    ),
                )
            )

            # put the clip into the track
            vtrack.append(vcl)
            
            
            #=======================================================
            # adding Audio clips 
            #convert the tuple ranges from audio rate to video rate
            #audio rate=500


        

            
            i2=i2+1

            print(i2)
    i=i+1

('Data/Audio/Osi Audio Extracted.wav', 1) Batman Right.mp4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
('Data/Audio/Scott Audio Extracted.wav', 1) Batman Right.mp4
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
('Data/Audio/Chukwu Audio Extracted.wav', 2) Batman Left.mp4
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
('Data/Audio/Crystal Audio Extracted.wav', 2) Batman Left.mp4
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86


In [95]:
otio.adapters.write_to_file(tl, 'videoclipsonly.otio')

True

# add a column for camera view as well. This would require:
- the video file list
- the audio video tuple

In [104]:
#idxmax_mic
#camera_index

In [29]:
#experiment analysis Duration is NOT DURATION. it is the end time!
# start time 2400 is a hundered seconds in
# duration 4800 is that the clip goes for 2400 until it reaches 4800 at the end time 

120.0

In [63]:
tuples[0]/arate*vrate

556.656

In [64]:
tuples[1]/arate*vrate

717.024