# Compute position of icons from overlaps between subtitles

In [1]:
from ass_tools import ASSProcessor, ASSReader, INIT_DIR
import pandas as pd 
import numpy as np 
from common import get_filenames

In [28]:
fp = get_filenames(
    title="Select ASS file", 
    filetypes=(("ASS files", "*.ass"),), 
    init_dir=INIT_DIR
)

reader = ASSReader()
reader.read_file(fp)
vp, styles, dialog = reader.get_ass_elements()
df_styles = reader.parse_styles(styles)
df_dialog = reader.parse_dialog(dialog, timecodes_as_seconds=True)

In [32]:
df_times = df_dialog.loc[:, ['Start', 'End', 'Style']].sort_values('Start')
df_times.head()

Unnamed: 0,Start,End,Style
0,0.7,4.11,Yakumo
1,4.11,6.65,Yakumo
2,6.65,13.13,Yakumo
3,13.33,16.08,Yakumo
4,16.08,20.73,Yakumo


In [33]:
%%timeit
df_times['NumOverlaps'] = [
    (row.Start < df_times['End'].iloc[:j]).sum()
    for j, row in enumerate(df_times.itertuples())
]

61.7 ms ± 1.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [53]:
df_times.iloc[:, :2]

Unnamed: 0,Start,End
0,0.70,4.11
1,4.11,6.65
2,6.65,13.13
3,13.33,16.08
4,16.08,20.73
...,...,...
672,1472.96,1473.93
673,1474.90,1475.88
674,1475.88,1478.83
675,1478.83,1481.66


## Compute overlaps with numpy 

In [106]:
[0, 1, 2, 3, 1, 0, 1, 0]

[0, 1, 2, 3, 1, 0, 1, 0]

In [160]:
A = np.array([[0, 8], [3, 6], [4, 10], [4.5, 7.6], [6, 8], [9, 10], [9.5, 12], [10.5, 13], [13, 15]])
B = A[:,0] - A[:,1][:, np.newaxis]
A

array([[ 0. ,  8. ],
       [ 3. ,  6. ],
       [ 4. , 10. ],
       [ 4.5,  7.6],
       [ 6. ,  8. ],
       [ 9. , 10. ],
       [ 9.5, 12. ],
       [10.5, 13. ],
       [13. , 15. ]])

In [161]:
C = np.triu(B, k=1); C

array([[ 0. , -5. , -4. , -3.5, -2. ,  1. ,  1.5,  2.5,  5. ],
       [ 0. ,  0. , -2. , -1.5,  0. ,  3. ,  3.5,  4.5,  7. ],
       [ 0. ,  0. ,  0. , -5.5, -4. , -1. , -0.5,  0.5,  3. ],
       [ 0. ,  0. ,  0. ,  0. , -1.6,  1.4,  1.9,  2.9,  5.4],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  1. ,  1.5,  2.5,  5. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. , -0.5,  0.5,  3. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -1.5,  1. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ]])

In [204]:
%%timeit 

pos = np.zeros(A.shape[0])
num = (C<0).sum(axis=0)
x = np.arange(A.shape[0])

for i in range(1, A.shape[0]):
    if num[i]  < 1: 
        pos[i] = np.setdiff1d(
            x[:i+1], 
            pos[:i+1][C[:i+1, i] < 0]
        ).min()

pos

24.1 µs ± 396 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## Test on ASS output

In [3]:
df = pd.read_csv("../testASS.csv", index_col=0, header=0)

In [4]:
ASSProcessor().get_posIndices(df)

Unnamed: 0,Start,End,Style,StyleInd,hasIcon,NumOverlaps,PositionIndex
0,0.00,11.67,Translator,-1,False,0,0
1,0.70,3.81,Yakumo,-1,False,1,0
2,3.81,6.75,Yakumo,-1,False,1,0
3,6.75,13.13,Yakumo,-1,False,1,0
4,13.33,16.08,Yakumo,-1,False,0,0
...,...,...,...,...,...,...,...
705,1471.36,1473.33,Sumire,3,True,1,0
706,1472.96,1473.93,Hinano,5,True,1,0
707,1474.90,1475.78,Ichinose - Center,-1,False,0,0
708,1475.78,1478.88,Ichinose - Center,-1,False,0,0


In [8]:
pos = ASSProcessor().get_posIndices(df.loc[df.hasIcon, :])
pos

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['NumOverlaps'] = numOverlaps
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['PositionIndex'] = pos


Unnamed: 0,Start,End,Style,StyleInd,hasIcon,NumOverlaps,PositionIndex
11,35.90,37.10,Ramune,0,True,0,0
12,36.80,38.80,Hanabusa,1,True,1,0
14,39.73,45.26,Ichinose,2,True,0,0
16,46.36,51.10,Ichinose,2,True,0,0
18,58.86,60.03,Ichinose,2,True,0,0
...,...,...,...,...,...,...,...
702,1465.23,1466.90,Ema,4,True,1,0
703,1467.13,1468.86,Sumire,3,True,0,0
704,1468.86,1472.03,Ichinose,2,True,0,0
705,1471.36,1473.33,Sumire,3,True,1,0


In [35]:
df_icon = df.loc[df.hasIcon, :]
deltas = np.triu(
    df_icon.Start.values - df_icon.End.values[:, np.newaxis], 
    k=1
)
num = (deltas < 0.2).sum(axis=0)

In [36]:
pos = np.zeros(deltas.shape[0], dtype=int)
inds = np.arange(deltas.shape[0], dtype=int)

for i in range(1, deltas.shape[0]):
    if num[i] > 0:
        pos[i] = np.setdiff1d(
            inds[:i+1], 
            pos[:i+1][deltas[:i+1, i] < 0]
        ).min()