In [7]:
import pandas as pd

df = pd.read_csv("kills_2021.csv", nrows=100000, encoding="latin1")
df.to_csv("kills_2021.csv", index=False)

print("Done.")


Done.


In [5]:
import pandas as pd

df = pd.read_csv("kills_2022.csv", nrows=100000, encoding="latin1")
df.to_csv("kills_2022.csv", index=False)

print("Done.")


Done.


In [9]:
import os
import pandas as pd

# ===== 配置区域 =====
FOLDER = "."           # 当前文件夹，如需指定其他路径可以改成 "./data"
THRESHOLD_MB = 25      # 大于多少 MB 才会被裁剪
MAX_ROWS = 80000       # 保留的行数
BACKUP_EXT = ".bak"    # 备份文件后缀

# 将 MB 转成字节
THRESHOLD_BYTES = THRESHOLD_MB * 1024 * 1024

# 常见编码尝试列表
ENCODINGS_TO_TRY = ["utf-8", "latin1", "ISO-8859-1", "cp1252"]

print(f"扫描文件夹: {os.path.abspath(FOLDER)}")
print(f"阈值: > {THRESHOLD_MB} MB 的 CSV 将被裁剪为前 {MAX_ROWS} 行\n")

for fname in os.listdir(FOLDER):
    fpath = os.path.join(FOLDER, fname)
    
    # 只处理普通文件 + .csv 后缀
    if not os.path.isfile(fpath):
        continue
    if not fname.lower().endswith(".csv"):
        continue

    size_bytes = os.path.getsize(fpath)
    size_mb = size_bytes / (1024 * 1024)

    if size_bytes <= THRESHOLD_BYTES:
        print(f"[跳过] {fname}  ({size_mb:.2f} MB ≤ {THRESHOLD_MB} MB)")
        continue

    print(f"\n[处理] {fname}  ({size_mb:.2f} MB > {THRESHOLD_MB} MB)")
    df = None
    used_encoding = None

    # 尝试不同编码读取前 MAX_ROWS 行
    for enc in ENCODINGS_TO_TRY:
        try:
            print(f"  尝试编码: {enc} ...", end="")
            df = pd.read_csv(fpath, nrows=MAX_ROWS, encoding=enc)
            used_encoding = enc
            print(" 成功")
            break
        except Exception as e:
            print(f"  失败: {e}")

    if df is None:
        print(f"  ❌ 所有编码尝试失败，跳过该文件：{fname}")
        continue

    # 备份原文件
    backup_path = fpath + BACKUP_EXT
    if not os.path.exists(backup_path):
        os.replace(fpath, backup_path)
        print(f"  已备份原始文件为: {backup_path}")
    else:
        print(f"  警告: 备份文件已存在 ({backup_path})，将覆盖原文件而不再额外备份。")

    # 将裁剪后的数据写回原文件名
    df.to_csv(fpath, index=False)
    new_size_bytes = os.path.getsize(fpath)
    new_size_mb = new_size_bytes / (1024 * 1024)

    print(f"  使用编码: {used_encoding}")
    print(f"  ✅ 已写回裁剪后的文件: {fname}  (约 {new_size_mb:.2f} MB)")
    
print("\n全部处理完成 ✅")


扫描文件夹: /Users/ha0s/Desktop/valorant-dashboard/public/data
阈值: > 25 MB 的 CSV 将被裁剪为前 80000 行

[跳过] scores_2024.csv  (0.05 MB ≤ 25 MB)
[跳过] maps_played_2022.csv  (0.91 MB ≤ 25 MB)
[跳过] kills_2023.csv  (11.14 MB ≤ 25 MB)

[处理] rounds_kills_2021.csv  (120.40 MB > 25 MB)
  尝试编码: utf-8 ... 成功
  已备份原始文件为: ./rounds_kills_2021.csv.bak
  使用编码: utf-8
  ✅ 已写回裁剪后的文件: rounds_kills_2021.csv  (约 11.99 MB)
[跳过] kills_2022.csv  (15.69 MB ≤ 25 MB)
[跳过] maps_played_2023.csv  (0.07 MB ≤ 25 MB)
[跳过] maps_scores_2021.csv  (2.08 MB ≤ 25 MB)
[跳过] scores_2025.csv  (0.05 MB ≤ 25 MB)
[跳过] win_loss_methods_round_number_2024.csv  (5.37 MB ≤ 25 MB)
[跳过] agents_pick_rates_2024.csv  (1.77 MB ≤ 25 MB)
[跳过] maps_played_2021.csv  (1.45 MB ≤ 25 MB)
[跳过] maps_scores_2023.csv  (0.11 MB ≤ 25 MB)
[跳过] team_mapping_2025.csv  (0.00 MB ≤ 25 MB)

[处理] rounds_kills_2022.csv  (120.14 MB > 25 MB)
  尝试编码: utf-8 ... 成功
  已备份原始文件为: ./rounds_kills_2022.csv.bak
  使用编码: utf-8
  ✅ 已写回裁剪后的文件: rounds_kills_2022.csv  (约 12.33 MB)
[跳过] all_team