In [1]:
import os
import polars as pl
import subprocess
from pathlib import Path

root_dir = r"G:\Movies"

# Common movie file extensions
movie_extensions = ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.m4v', '.mpg', '.mpeg']

def get_video_duration(file_path):
    try:
        cmd = [
            'ffprobe',
            '-v', 'error',
            '-show_entries', 'format=duration',
            '-of', 'default=noprint_wrappers=1:nokey=1',
            file_path
        ]

        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        duration = float(result.stdout.strip())
        return duration
    except:
        return None

def format_duration(seconds):
    if seconds is None:
        return "Unknown"

    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int(seconds % 60)

    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

def find_movies(root_dir):
    movies = []

    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(filename.lower().endswith(ext) for ext in movie_extensions):
                full_path = os.path.join(dirpath, filename)

                try:
                    file_size_mb = os.path.getsize(full_path) / (1024 * 1024)
                except:
                    file_size_mb = 0

                duration_seconds = get_video_duration(full_path)
                duration_formatted = format_duration(duration_seconds)

                movies.append({
                    'filename': filename,
                    'path': full_path,
                    'size_mb': round(file_size_mb, 2),
                    'duration': duration_formatted
                })

    return movies

movies = find_movies(root_dir)

df = pl.DataFrame(movies)

df.head(5)

filename,path,size_mb,duration
str,str,f64,str
"""Beauty and the Beast (2017) BD…","""G:\Movies\Animacija\Beauty and…",14280.03,"""Unknown"""
"""Beauty.and.the.Beast.1991.UHD.…","""G:\Movies\Animacija\Beauty.and…",8066.74,"""Unknown"""
"""Cars.3.2017.BDRip.1080p.LT.EN.…","""G:\Movies\Animacija\Cars.3.201…",11540.08,"""Unknown"""
"""Despicable.Me.4.2024.BDRip.108…","""G:\Movies\Animacija\Despicable…",12622.75,"""Unknown"""
"""Dragon Ball Super Broly (2018)…","""G:\Movies\Animacija\Dragon Bal…",8333.56,"""Unknown"""


In [2]:
df

filename,path,size_mb,duration
str,str,f64,str
"""Beauty and the Beast (2017) BD…","""G:\Movies\Animacija\Beauty and…",14280.03,"""Unknown"""
"""Beauty.and.the.Beast.1991.UHD.…","""G:\Movies\Animacija\Beauty.and…",8066.74,"""Unknown"""
"""Cars.3.2017.BDRip.1080p.LT.EN.…","""G:\Movies\Animacija\Cars.3.201…",11540.08,"""Unknown"""
"""Despicable.Me.4.2024.BDRip.108…","""G:\Movies\Animacija\Despicable…",12622.75,"""Unknown"""
"""Dragon Ball Super Broly (2018)…","""G:\Movies\Animacija\Dragon Bal…",8333.56,"""Unknown"""
…,…,…,…
"""3.Fast.And.The.Furious.Tokyo.D…","""G:\Movies\Veiksmo\The.Fast.and…",2706.39,"""Unknown"""
"""4.Fast.And.Furious.2009.BDRip-…","""G:\Movies\Veiksmo\The.Fast.and…",2939.03,"""Unknown"""
"""5.Fast Five.2011.BDRip-AVC.LT.…","""G:\Movies\Veiksmo\The.Fast.and…",3060.55,"""Unknown"""
"""6.Fast.And.Furious.6.2013.BDRi…","""G:\Movies\Veiksmo\The.Fast.and…",3579.78,"""Unknown"""


In [3]:
df.select(
    pl.col('filename').sort()
).group_by(
    'filename'
).len().sort('len').reverse()

filename,len
str,u32
"""DBGT - OP.mkv""",4
"""DBGT - ED2.mkv""",2
"""DBGT - ED3.mkv""",2
"""DBGT - ED1.mkv""",2
"""Dragon Ball Z - 080 - A Sudden…",1
…,…
"""Marvels.Daredevil.2018.S03E11.…",1
"""Kaleidoscope.S01E03.1080p.NF.W…",1
"""Dragon Ball Z - 265 - Buu comm…",1
"""WandaVision.S01E01.1080p.WEB-D…",1


In [4]:
df.filter(
    ~pl.col('path').str.contains('Animacija'),
    ~pl.col('path').str.contains('Serials'),
)['filename']

filename
str
"""arcane.s02e01.1080p.web.h264-s…"
"""arcane.s02e01.1080p.web.h264-s…"
"""arcane.s02e02.1080p.web.h264-s…"
"""arcane.s02e02.1080p.web.h264-s…"
"""arcane.s02e03.1080p.web.h264-s…"
…
"""3.Fast.And.The.Furious.Tokyo.D…"
"""4.Fast.And.Furious.2009.BDRip-…"
"""5.Fast Five.2011.BDRip-AVC.LT.…"
"""6.Fast.And.Furious.6.2013.BDRi…"


In [5]:
df.filter(
    pl.col('filename').str.contains('Andreas')
)

filename,path,size_mb,duration
str,str,f64,str
"""San.Andreas.2015.720p.WEBDL.x2…","""G:\Movies\Disasters\San.Andrea…",3189.89,"""Unknown"""


In [10]:
df.sort('size_mb').reverse()

filename,path,size_mb,duration
str,str,f64,str
"""Titanic 1997.1080p.BD-Remux.AV…","""G:\Movies\Veiksmo\Titanic 1997…",51881.95,"""Unknown"""
"""Godzilla.Vs.Kong.2021.BD-Remux…","""G:\Movies\Fantastika\Godzilla.…",44848.62,"""Unknown"""
"""The.Hateful.Eight.2015.BDRemux…","""G:\Movies\Veiksmo\The.Hateful.…",40816.14,"""Unknown"""
"""The.Day.After.Tomorrow.2004.BD…","""G:\Movies\Disasters\The.Day.Af…",33448.19,"""Unknown"""
"""The.Chronicles.Of.Narnia.The.L…","""G:\Movies\Fantastika\The.Chron…",33244.98,"""Unknown"""
…,…,…,…
"""DBGT - ED1.mkv""","""G:\Movies\Animacija\Dragon.Bal…",24.22,"""Unknown"""
"""DBGT - ED2.mkv""","""G:\Movies\Animacija\Dragon.Bal…",23.35,"""Unknown"""
"""DBGT - ED2.mkv""","""G:\Movies\Animacija\Dragon.Bal…",23.35,"""Unknown"""
"""DBGT - ED4.mkv""","""G:\Movies\Animacija\Dragon.Bal…",22.08,"""Unknown"""
