In [5]:
import numpy as np
import pandas as pd
import os
from glob import glob
base_path = "/mnt/c/Temp/analyses_friends_annotations/friends_annotations/annotation_results/DeepgazeMr"
tsv_files = glob(os.path.join(base_path, '**', '*.tsv'), recursive=True)
all_data = []

for file in tsv_files:
    # Read TSV, skipping the header row if present
    df = pd.read_csv(file, sep='\t', header=0, names=['h', 'w', 'num_local_maxima'], dtype=str)

    
    # Convert to numeric types (invalid values become NaN)
    df['h'] = pd.to_numeric(df['h'], errors='coerce')
    df['w'] = pd.to_numeric(df['w'], errors='coerce')
    df['num_local_maxima'] = pd.to_numeric(df['num_local_maxima'], errors='coerce')

    # Drop rows with any NaNs (i.e., those that failed numeric conversion)
    df = df.dropna(subset=['h', 'w', 'num_local_maxima'])

    # Extract episode name from filename
    episode = os.path.basename(file).replace('_maxpeak_coord.tsv', '')
    df['episode'] = episode

    all_data.append(df)

# Combine all data into one dataframe
df_all = pd.concat(all_data, ignore_index=True)


In [46]:

# Display the result
print(df_all.describe())

                  h             w  num_local_maxima
count  7.178296e+06  7.178296e+06      7.178296e+06
mean   1.611584e+02  3.609861e+02      1.639282e+00
std    4.677533e+01  9.460282e+01      9.633461e-01
min    0.000000e+00  0.000000e+00      0.000000e+00
25%    1.320000e+02  2.970000e+02      1.000000e+00
50%    1.520000e+02  3.590000e+02      1.000000e+00
75%    1.790000e+02  4.250000e+02      2.000000e+00
max    4.790000e+02  7.000000e+02      1.300000e+01


In [6]:

print(df_all)


           h    w  num_local_maxima          episode
0        215  359                 1  friends_s01e01a
1        215  359                 1  friends_s01e01a
2        215  359                 1  friends_s01e01a
3        215  359                 1  friends_s01e01a
4        215  359                 1  friends_s01e01a
...      ...  ...               ...              ...
7178291  215  359                 1  friends_s07e23d
7178292  215  359                 1  friends_s07e23d
7178293  215  359                 1  friends_s07e23d
7178294  215  359                 1  friends_s07e23d
7178295  215  359                 1  friends_s07e23d

[7178296 rows x 4 columns]
