In [None]:
import pandas as pd
import opencv as cv2
import os


In [None]:
df = pd.read_excel('helse_ordliste.xlsx')
video_folder = "HealthVideos"
metadata = "helse_ordliste_mod.xlsx"
aug_folder = "AugHealthVideos"
aug_resized_224 = "ResizedVideos_224"

In [4]:
df.head()

Unnamed: 0,helse_ordliste,Unnamed: 1
0,ord,video
1,1. gradsforbrenning,foerstegradsforbrenning.mp4
2,2. gradsforbrenning,andregradsforbrenning.mp4
3,3. gradsforbrenning 1,tredjegradsforbrenning-1.mp4
4,3. gradsforbrenning 2,tredjegradsforbrenning-2.mp4


In [5]:
df.isnull().values.any()

False

In [None]:
#Gather video stats 
def get_video_stats(video_path):
    video_capture = cv2.VideoCapture(video_path)
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    frame_count = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
    
    if fps > 0 and frame_count > 0:
        duration = frame_count / fps  
    else:
        duration = None
    
    # Get the frame size 
    frame_width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_size = (frame_width, frame_height)
    
    file_size_mb = os.path.getsize(video_path) / (1024 * 1024) 

    video_capture.release()

    return duration, frame_size, file_size_mb


In [7]:
# Define a function to update min/max stats
def update_min_max_stats(stats, min_stats, max_stats, video_file):
    duration, frame_size, file_size_mb = stats

    # Update min/max values for video duration
    if duration is not None:
        if duration < min_stats['min_duration']:
            min_stats['min_duration'] = duration
            min_stats['min_video'] = video_file
        if duration > max_stats['max_duration']:
            max_stats['max_duration'] = duration
            max_stats['max_video'] = video_file

    # Update min/max values for frame size
    if frame_size[0] * frame_size[1] < min_stats['min_frame_size'][0] * min_stats['min_frame_size'][1]:
        min_stats['min_frame_size'] = frame_size
        min_stats['min_frame_video'] = video_file
    if frame_size[0] * frame_size[1] > max_stats['max_frame_size'][0] * max_stats['max_frame_size'][1]:
        max_stats['max_frame_size'] = frame_size
        max_stats['max_frame_video'] = video_file

    # Update min/max values for file size
    if file_size_mb < min_stats['min_size_mb']:
        min_stats['min_size_mb'] = file_size_mb
        min_stats['min_size_video'] = video_file
    if file_size_mb > max_stats['max_size_mb']:
        max_stats['max_size_mb'] = file_size_mb
        max_stats['max_size_video'] = video_file


In [8]:
#Process the DF and gather stats
def process_video_files(df, video_folder):
    min_stats = {
        'min_duration': float('inf'),
        'min_video': '',
        'min_frame_size': (float('inf'), float('inf')),
        'min_frame_video': '',
        'min_size_mb': float('inf'),
        'min_size_video': ''
    }
    max_stats = {
        'max_duration': 0,
        'max_video': '',
        'max_frame_size': (0, 0),
        'max_frame_video': '',
        'max_size_mb': 0,
        'max_size_video': ''
    }

    duplicate_videos = set() 

    for index, row in df.iterrows():
        video_file = row['Video_File']

        if video_file in duplicate_videos:
            continue
        duplicate_videos.add(video_file)

        video_path = os.path.join(video_folder, video_file)

        if os.path.exists(video_path):
            stats = get_video_stats(video_path)

            update_min_max_stats(stats, min_stats, max_stats, video_file)

        else:
            print(f'File not found: {video_file}')

    return min_stats, max_stats, duplicate_videos


In [9]:
#Check for duplicate videos
def check_duplicates(df):
    duplicate_videos_df = df[df.duplicated(subset='Video_File', keep=False)]
    if not duplicate_videos_df.empty:
        print(f'Number of duplicates: {duplicate_videos_df.shape[0] // 2}\n')
        print('Duplicate videos and their associated terms:')
        for video, group in duplicate_videos_df.groupby('Video_File'):
            terms = group['Health_Term'].tolist()
            print(f'Video: {video} -> Terms: {", ".join(terms)}')


In [10]:
#Count unique terms and videos
unique_terms_count = df['Health_Term'].nunique()
print(f'Number of unique health terms: {unique_terms_count}')
num_videos = df['Video_File'].nunique()
print(f'Total number of unique videos: {num_videos}')


KeyError: 'Health_Term'

In [86]:
def calculate_total_video_size(video_folder, df):
    total_size_mb = 0
    for _, row in df.iterrows():
        video_file = row['Video_File']
        video_path = os.path.join(video_folder, video_file)

        if os.path.exists(video_path):
            total_size_mb += os.path.getsize(video_path) / (1024 * 1024) 
        else:
            print(f'File not found: {video_file}')

    return total_size_mb

In [87]:
# Process the video files and get stats
min_stats, max_stats, duplicate_videos = process_video_files(df, video_folder)

print(f'Shortest video: {min_stats["min_video"]} with length {min_stats["min_duration"]:.2f} seconds')
print(f'Longest video: {max_stats["max_video"]} with length {max_stats["max_duration"]:.2f} seconds\n')

print(f'Smallest frame size: {min_stats["min_frame_video"]} with resolution {min_stats["min_frame_size"][0]}x{min_stats["min_frame_size"][1]}')
print(f'Largest frame size: {max_stats["max_frame_video"]} with resolution {max_stats["max_frame_size"][0]}x{max_stats["max_frame_size"][1]}\n')

print(f'Smallest file size: {min_stats["min_size_video"]} with size {min_stats["min_size_mb"]:.2f} MB')
print(f'Largest file size: {max_stats["max_size_video"]} with size {max_stats["max_size_mb"]:.2f} MB\n')

total_size_mb = calculate_total_video_size(video_folder, df)
print(f'Total size of all videos: {total_size_mb:.2f} MB\n')

check_duplicates(df)


Shortest video: fall-i-haaret.mp4 with length 0.40 seconds
Longest video: posttraumatisk-stressforstyrrelse.mp4 with length 4.68 seconds

Smallest frame size: abort.mp4 with resolution 640x480
Largest frame size: foerstegradsforbrenning.mp4 with resolution 960x720

Smallest file size: fall-i-haaret.mp4 with size 0.08 MB
Largest file size: posttraumatisk-stressforstyrrelse.mp4 with size 0.77 MB

Total size of all videos: 234.59 MB

Number of duplicates: 12

Duplicate videos and their associated terms:
Video: fall-i-haaret.mp4 -> Terms: fall i håret, hår med fall
Video: frysninger.mp4 -> Terms: frysninger, gåsehud
Video: haar-tykt-og-kroellet.mp4 -> Terms: hår: tykt og krøllet, krøller: tykt og krøllet hår
Video: haar-tykt-og-smaakroellet.mp4 -> Terms: hår: tykt og småkrøllet, krøller: tykt og småkrøllet hår
Video: halvlangt-haar.mp4 -> Terms: halvlangt hår, hår: halvlangt
Video: kirurgi.mp4 -> Terms: kirurg, kirurgi
Video: krum-nese-oernenese.mp4 -> Terms: krum nese, ørnenese, nese: kru

In [None]:
# Function to get the number of frames in a video
def get_video_frame_count(video_path):
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Could not open video {video_path}")
        return 0
    
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    cap.release()
    return frame_count

frame_counts = []
for video_file in os.listdir(video_folder):
    video_path = os.path.join(video_folder, video_file)
    
    if video_file.endswith('.mp4'):
        num_frames = get_video_frame_count(video_path)
        frame_counts.append((video_file, num_frames))

for video_file, num_frames in frame_counts:
    print(f'{video_file}: {num_frames} frames')


-fobi.mp4: 40 frames
aande-puste.mp4: 48 frames
aapne-munn-1.mp4: 42 frames
aapne-munn-2.mp4: 39 frames
aapne-oeynene-1.mp4: 41 frames
aapne-oeynene-2.mp4: 40 frames
aare-blodaare.mp4: 51 frames
abort.mp4: 38 frames
abstinens.mp4: 57 frames
adferd-atferd.mp4: 34 frames
aggresjon-1.mp4: 40 frames
aggresjon-2.mp4: 44 frames
akupunktur.mp4: 76 frames
akutt-2.mp4: 54 frames
akutt.mp4: 39 frames
aldershjem.mp4: 62 frames
alkohol.mp4: 48 frames
alkoholfri.mp4: 56 frames
allergi.mp4: 58 frames
ambulanse-2.mp4: 64 frames
ambulanse-3.mp4: 68 frames
ambulanse.mp4: 52 frames
amen.mp4: 52 frames
amputere.mp4: 47 frames
anatomi.mp4: 64 frames
andregradsforbrenning.mp4: 81 frames
anestesisykepleier.mp4: 81 frames
anfall.mp4: 38 frames
angst-2.mp4: 74 frames
angst.mp4: 42 frames
angstanfall.mp4: 69 frames
angstrengelsesastma.mp4: 81 frames
anoreksi.mp4: 61 frames
ansikt.mp4: 35 frames
antibiotika.mp4: 63 frames
antidepressiva.mp4: 65 frames
antistoff.mp4: 64 frames
apotek.mp4: 43 frames
appetitt.mp4:

In [None]:
# Frame count ranges for the video_folder (original videos)

frame_count_ranges = {
    '0-9': 0,
    '10-19': 0,
    '20-29': 0,
    '30-39': 0,
    '40-49': 0,
    '50-59': 0,
    '60-69': 0,
    '70-79': 0,
    '80-89': 0,
    '90-99': 0,
    '100-119': 0
}

def get_frame_count(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

for video_name in os.listdir(video_folder):
    video_path = os.path.join(video_folder, video_name)
    
    if os.path.isfile(video_path) and video_path.lower().endswith(('.mp4')):  
        frame_count = get_frame_count(video_path)

        if 0 <= frame_count <= 9:
            frame_count_ranges['0-9'] += 1
        elif 10 <= frame_count <= 19:
            frame_count_ranges['10-19'] += 1
        elif 20 <= frame_count <= 29:
            frame_count_ranges['20-29'] += 1
        elif 30 <= frame_count <= 39:
            frame_count_ranges['30-39'] += 1
        elif 40 <= frame_count <= 49:
            frame_count_ranges['40-49'] += 1
        elif 50 <= frame_count <= 59:
            frame_count_ranges['50-59'] += 1
        elif 60 <= frame_count <= 69:
            frame_count_ranges['60-69'] += 1
        elif 70 <= frame_count <= 79:
            frame_count_ranges['70-79'] += 1
        elif 80 <= frame_count <= 89:
            frame_count_ranges['80-89'] += 1
        elif 90 <= frame_count <= 99:
            frame_count_ranges['90-99'] += 1
        else:
            frame_count_ranges['100-119'] += 1

for frame_range, count in frame_count_ranges.items():
    print(f'Number of videos with {frame_range} frames: {count}')


Number of videos with 0-9 frames: 0
Number of videos with 10-19 frames: 1
Number of videos with 20-29 frames: 3
Number of videos with 30-39 frames: 43
Number of videos with 40-49 frames: 207
Number of videos with 50-59 frames: 271
Number of videos with 60-69 frames: 117
Number of videos with 70-79 frames: 45
Number of videos with 80-89 frames: 22
Number of videos with 90-99 frames: 6
Number of videos with 100-119 frames: 3


In [2]:
# df after preparations 

df = pd.read_excel(metadata)
df.head()

Unnamed: 0,Health_Term,Video_File
0,1. gradsforbrenning,foerstegradsforbrenning.mp4
1,2. gradsforbrenning,andregradsforbrenning.mp4
2,3. gradsforbrenning 1,tredjegradsforbrenning-1.mp4
3,3. gradsforbrenning 2,tredjegradsforbrenning-2.mp4
4,abort,abort.mp4


In [13]:
video_folder = "helse_tegn"

min_frames = float('inf')
max_frames = 0

for filename in os.listdir(video_folder):
    if filename.endswith(".mp4"): 
        video_path = os.path.join(video_folder, filename)
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Failed to open video: {filename}")
            continue

        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        min_frames = min(min_frames, frame_count)
        max_frames = max(max_frames, frame_count)
        cap.release()

print(f"Minimum number of frames: {min_frames}")
print(f"Maximum number of frames: {max_frames}")


Minimum number of frames: 10
Maximum number of frames: 117


In [4]:
# Augmented folder

frame_count_ranges = {
    '0-9': 0,
    '10-19': 0,
    '20-29': 0,
    '30-39': 0,
    '40-49': 0,
    '50-59': 0,
    '60-69': 0,
    '70-79': 0,
    '80-89': 0,
    '90-99': 0,
    '100-119': 0
}

video_counter = 0

def get_frame_count(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

for term_folder in os.listdir(aug_folder):
    term_path = os.path.join(aug_folder, term_folder)

    if os.path.isdir(term_path):
        for video_name in os.listdir(term_path):
            video_path = os.path.join(term_path, video_name)

            if os.path.isfile(video_path) and video_path.lower().endswith('.mp4'):
                frame_count = get_frame_count(video_path)
                video_counter += 1

                if 0 <= frame_count <= 9:
                    frame_count_ranges['0-9'] += 1
                elif 10 <= frame_count <= 19:
                      frame_count_ranges['10-19'] += 1
                elif 20 <= frame_count <= 29:
                     frame_count_ranges['20-29'] += 1
                elif 30 <= frame_count <= 39:
                      frame_count_ranges['30-39'] += 1
                elif 40 <= frame_count <= 49:
                      frame_count_ranges['40-49'] += 1
                elif 50 <= frame_count <= 59:
                    frame_count_ranges['50-59'] += 1
                elif 60 <= frame_count <= 69:
                     frame_count_ranges['60-69'] += 1
                elif 70 <= frame_count <= 79:
                     frame_count_ranges['70-79'] += 1
                elif 80 <= frame_count <= 89:
                     frame_count_ranges['80-89'] += 1
                elif 90 <= frame_count <= 99:
                     frame_count_ranges['90-99'] += 1
                else:
                    frame_count_ranges['100-119'] += 1

print(f"\nTotal videos counted: {video_counter}\n")
for frame_range, count in frame_count_ranges.items():
    print(f'Number of videos with {frame_range} frames: {count}')



Total videos counted: 5840

Number of videos with 0-9 frames: 0
Number of videos with 10-19 frames: 16
Number of videos with 20-29 frames: 36
Number of videos with 30-39 frames: 444
Number of videos with 40-49 frames: 1698
Number of videos with 50-59 frames: 2161
Number of videos with 60-69 frames: 896
Number of videos with 70-79 frames: 352
Number of videos with 80-89 frames: 171
Number of videos with 90-99 frames: 38
Number of videos with 100-119 frames: 28


In [None]:
# zero_pad_resized_224 folder

frame_count_ranges = {
    '0-9': 0,
    '10-19': 0,
    '20-29': 0,
    '30-39': 0,
    '40-49': 0,
    '50-59': 0,
    '60-69': 0,
    '70-79': 0,
    '80-89': 0,
    '90-99': 0,
    '100-119': 0
}

video_counter = 0

def get_frame_count(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

for term_folder in os.listdir(aug_resized_224):
    term_path = os.path.join(aug_resized_224, term_folder)

    if os.path.isdir(term_path):
        for video_name in os.listdir(term_path):
            video_path = os.path.join(term_path, video_name)

            if os.path.isfile(video_path) and video_path.lower().endswith('.mp4'):
                frame_count = get_frame_count(video_path)
                video_counter += 1

                if 0 <= frame_count <= 9:
                    frame_count_ranges['0-9'] += 1
                elif 10 <= frame_count <= 19:
                      frame_count_ranges['10-19'] += 1
                elif 20 <= frame_count <= 29:
                     frame_count_ranges['20-29'] += 1
                elif 30 <= frame_count <= 39:
                      frame_count_ranges['30-39'] += 1
                elif 40 <= frame_count <= 49:
                      frame_count_ranges['40-49'] += 1
                elif 50 <= frame_count <= 59:
                    frame_count_ranges['50-59'] += 1
                elif 60 <= frame_count <= 69:
                     frame_count_ranges['60-69'] += 1
                elif 70 <= frame_count <= 79:
                     frame_count_ranges['70-79'] += 1
                elif 80 <= frame_count <= 89:
                     frame_count_ranges['80-89'] += 1
                elif 90 <= frame_count <= 99:
                     frame_count_ranges['90-99'] += 1
                else:
                    frame_count_ranges['100-119'] += 1

print(f"\nTotal videos counted: {video_counter}\n")
for frame_range, count in frame_count_ranges.items():
    print(f'Number of videos with {frame_range} frames: {count}')



Total videos counted: 5840

Number of videos with 0-9 frames: 0
Number of videos with 10-19 frames: 0
Number of videos with 20-29 frames: 0
Number of videos with 30-39 frames: 0
Number of videos with 40-49 frames: 0
Number of videos with 50-59 frames: 0
Number of videos with 60-69 frames: 0
Number of videos with 70-79 frames: 0
Number of videos with 80-89 frames: 0
Number of videos with 90-99 frames: 0
Number of videos with 100-119 frames: 5840


In [None]:
def check_fps_in_folder(folder_path):
    fps_values = set()  

    # Iterate over each term folder in the augmented folder
    for term_name in os.listdir(folder_path):
        term_path = os.path.join(folder_path, term_name)
        if os.path.isdir(term_path):
            for video_file in os.listdir(term_path):
                if video_file.endswith('.mp4'):
                    video_path = os.path.join(term_path, video_file)
                    
                    cap = cv2.VideoCapture(video_path)
                    if not cap.isOpened():
                        print(f"ERROR: Could not open video {video_path}")
                        continue
                    
                    fps = cap.get(cv2.CAP_PROP_FPS)
                    fps_values.add(fps)  
                    
                    cap.release()
                    print(f"Video: {video_file}, FPS: {fps}")
    
    if len(fps_values) == 1:
        print("All videos have the same FPS.")
    else:
        print(f"Different FPS values found: {fps_values}")

aug_folder = "aug_videos"
check_fps_in_folder(aug_folder)


Video: aug_1_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_2_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_3_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_4_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_5_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_6_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_7_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_8_foerstegradsforbrenning.mp4, FPS: 25.0
Video: aug_9_foerstegradsforbrenning.mp4, FPS: 25.0
Video: foerstegradsforbrenning.mp4, FPS: 25.0
Video: andregradsforbrenning.mp4, FPS: 25.0
Video: aug_1_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_2_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_3_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_4_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_5_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_6_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_7_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_8_andregradsforbrenning.mp4, FPS: 25.0
Video: aug_9_andregradsforbrenning.mp4, FP