In [None]:
import subprocess
import matplotlib.pyplot as plt
from collections import OrderedDict
import pandas as pd

## 获取文件夹大小

In [None]:
def get_folder_sizes_windows(base_path):
    """
    使用 PowerShell 命令统计各文件夹大小，并处理空文件夹情况
    """
    folder_sizes = {}
    # PowerShell 命令中增加了对空文件夹的处理
    powershell_command = (
        "[Console]::OutputEncoding = [Text.UTF8Encoding]::UTF8; "
        f"Get-ChildItem '{base_path}' -Directory -Recurse| ForEach-Object {{"
        f"  $folderPath = $_.FullName; "
        f"  $files = Get-ChildItem $folderPath -Recurse -File -ErrorAction SilentlyContinue; "
        f"  $size = if ($files) {{ ($files | Measure-Object -Property Length -Sum).Sum }} else {{ 0 }}; "
        f"  $folderPath, $size }}"
    )
    # command = ["powershell", "-Command", powershell_command]
    command = ["powershell", "-ExecutionPolicy", "Bypass", "-Command", powershell_command]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()

    if stderr:
        print(f"错误: {stderr.decode('utf-8', errors='replace')}")

    output = stdout.decode('utf-8', errors='replace').splitlines()

    for i in range(0, len(output), 2):
        try:
            folder = output[i]
            size = int(output[i + 1])
            folder_sizes[folder] = size
        except (IndexError, ValueError):
            print(f"跳过行由于解析错误: {output[i:i + 2]}")

    return folder_sizes

def convert_size_to_gb(size_bytes):
    """
    将字节大小转换为 GB
    """
    return size_bytes / (1024 ** 3)

def plot_bar_chart(data):
    """
    绘制条形图
    """
    labels = list(data.keys())
    sizes = list(data.values())
    plt.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文
    plt.figure(figsize=(120, 80))
    plt.barh(labels, sizes, color='skyblue')
    plt.xlabel('大小 (GB)')
    plt.ylabel('文件夹')
    plt.title('各文件夹大小')
    plt.gca().invert_yaxis()  # 使最大的在上方
    plt.tight_layout()
    plt.show()

# 修改为你要统计的根目录路径
base_path = r'your/path'
folder_sizes_bytes = get_folder_sizes_windows(base_path)

# 转换为以 GB 为单位的数值，并按大小排序
folder_sizes_gb = {k: convert_size_to_gb(v) for k, v in folder_sizes_bytes.items()}
sorted_folder_sizes = OrderedDict(sorted(folder_sizes_gb.items(), key=lambda x: x[1], reverse=True))

# 绘制条形图
# plot_bar_chart(sorted_folder_sizes)

In [None]:
sorted_folder_sizes

In [None]:
key = list(sorted_folder_sizes.keys())
value = list(sorted_folder_sizes.values())

In [None]:
def plot_bar_chart(data):

    labels = list(data.keys())
    sizes = list(data.values())

    plt.figure(figsize=(120, 80), dpi=100)
    plt.barh(labels, sizes, color='skyblue')
    plt.xlabel('大小 (GB)')
    plt.ylabel('文件夹')
    plt.title('各文件夹大小')
    plt.gca().invert_yaxis()  # 使最大的在上方
    plt.tight_layout()
    plt.show()


plot_bar_chart(sorted_folder_sizes)

In [None]:
df = pd.DataFrame(list(sorted_folder_sizes.items()), columns=['文件夹路径', '大小 (GB)'])

df.to_excel("文件夹大小.xlsx", index=False)

## 获取每个文件的大小

In [None]:
import subprocess
from collections import OrderedDict
import matplotlib.pyplot as plt
import pandas as pd

def get_files_sizes_windows(base_path):
    """
    使用 PowerShell 命令获取每个文件的大小和路径
    """
    file_sizes = []
    powershell_command = (
        "[Console]::OutputEncoding = [Text.UTF8Encoding]::UTF8; "
        f"Get-ChildItem '{base_path}' -Recurse -File | ForEach-Object {{"
        f"  $_.FullName, $_.Length }}"
    )
    # command = ["powershell", "-Command", powershell_command]
    command = ["powershell", "-ExecutionPolicy", "Bypass", "-Command", powershell_command]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()

    if stderr:
        print(f"错误: {stderr.decode('utf-8', errors='replace')}")

    output = stdout.decode('utf-8', errors='replace').splitlines()

    for i in range(0, len(output), 2):
        try:
            file_path = output[i]
            size = int(output[i + 1])
            file_sizes.append((file_path, size))
        except (IndexError, ValueError):
            print(f"跳过行由于解析错误: {output[i:i + 2]}")

    return file_sizes

def convert_size_to_gb(size_bytes):
    """
    将字节大小转换为 GB
    """
    return size_bytes / (1024 ** 3)

def plot_bar_chart(data):
    """
    绘制条形图
    """
    labels = list(data.keys())
    sizes = list(data.values())
    plt.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文
    plt.figure(figsize=(120, 80))
    plt.barh(labels, sizes, color='skyblue')
    plt.xlabel('大小 (GB)')
    plt.ylabel('文件')
    plt.title('各文件大小')
    plt.gca().invert_yaxis()  # 使最大的在上方
    plt.tight_layout()
    plt.show()

# 修改为你要统计的根目录路径
base_path = r'your/path'

file_sizes_bytes = get_files_sizes_windows(base_path)

# 转换为以 GB 为单位的数值，并按大小排序
file_sizes_gb = {path: convert_size_to_gb(size) for path, size in file_sizes_bytes}
sorted_file_sizes = OrderedDict(sorted(file_sizes_gb.items(), key=lambda x: x[1], reverse=True))
sorted_file_sizes
# 绘制条形图
# plot_bar_chart(sorted_file_sizes)


In [None]:
# filtered_df = df[df['文件夹路径'].str.contains('20240809', na=False)]

# filtered_df

In [None]:
df.to_excel("51.xlsx", index=False)

In [None]:
df = pd.DataFrame(list(sorted_file_sizes.items()), columns=['文件夹路径', '大小 (GB)'])


df.to_csv("111.csv", index=False)

##### 另一种方法，没有上面的快

In [None]:
from pathlib import Path

def get_folder_sizes(base_path, max_depth=3):
    folder_sizes = {}
    base_path = Path(base_path)

    def get_size(path):
        total_size = 0
        for file in path.rglob('*'):
            if file.is_file():
                total_size += file.stat().st_size
        return total_size

    def process_folder(path, depth=1):
        if depth <= max_depth:
            folder_sizes[str(path)] = get_size(path)
        if depth < max_depth:
            for subfolder in path.glob('*/'):
                process_folder(subfolder, depth + 1)

    process_folder(base_path)
    return folder_sizes


base_path = r'your/path'
folder_sizes_bytes = get_folder_sizes(base_path)

# 转换为以 GB 为单位的数值，并按大小排序
folder_sizes_gb = {k: convert_size_to_gb(v) for k, v in folder_sizes_bytes.items()}
sorted_folder_sizes1 = OrderedDict(sorted(folder_sizes_gb.items(), key=lambda x: x[1], reverse=True))

In [None]:
sorted_folder_sizes1