In [4]:
import os
import pandas as pd

path = r"D:\Dataset\UCMerced_LandUse_Dataset\Images"

image_name_list = []
ground_truth = []
for filename in os.listdir(path):
    for image_name in os.listdir(os.path.join(path, filename)):
        name = image_name.split(".")[0]
        image_name_list.append(name)
        ground_truth.append(filename)
data = pd.DataFrame({
    "image_name": image_name_list,
    "ground_truth": ground_truth
})
data.to_csv("docs/data.csv", index=False, encoding="utf_8_sig")
data.head()

Unnamed: 0,image_name,ground_truth
0,agricultural00,agricultural
1,agricultural01,agricultural
2,agricultural02,agricultural
3,agricultural03,agricultural
4,agricultural04,agricultural


In [2]:
data.shape

(2100, 2)

In [12]:
import random

train_size, test_size = 0.5, 0.5
train_data = pd.DataFrame()
data_group = data.groupby("ground_truth")
for kind, value in data_group:
    data_sample_train = value.sample(int(100 * train_size))
    train_data = pd.concat([train_data, data_sample_train])
test_data = pd.concat([data, train_data, train_data]).drop_duplicates(keep=False)
train_data.to_csv("docs/train.csv", index=False, encoding="utf_8_sig")
test_data.to_csv("docs/test.csv", index=False, encoding="utf_8_sig")

## 生成配置文件

In [79]:
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap

# 示例数据
kind = data["ground_truth"].unique()

# 创建 YAML 对象并配置格式
yaml = YAML()
yaml.indent(mapping=2, sequence=4, offset=2)  # 设置缩进

# 创建带注释的数据结构
yaml_data = CommentedMap()

# 添加顶层注释
yaml_data.yaml_set_start_comment("task: classification\nVersion: 1.0")

# 添加数据项和注释
yaml_data['num_classes'] = len(kind)
yaml_data.yaml_set_comment_before_after_key('num_classes', '\nNumber of classes', indent=0)
yaml_data['epochs'] = 10
yaml_data.yaml_set_comment_before_after_key('epochs', 'Training configuration root keys below:\n- epochs\n- batch_size', indent=0)
yaml_data['batch_size'] = 128
yaml_data.yaml_set_comment_before_after_key('batch_size', 'Batch size for training', indent=0)

# 创建 optimizer 配置
optimizer = CommentedMap()
optimizer['name'] = "adam"
optimizer['learning_rate'] = 1e-4
optimizer.yaml_set_comment_before_after_key('learning_rate', 'Learning rate', indent=2)
optimizer['step_size'] = 50
optimizer['gamma'] = 0.8
yaml_data['optimizer'] = optimizer
yaml_data.yaml_set_comment_before_after_key('optimizer', '\n', indent=0)
# 创建 model 配置
model = CommentedMap()
model["name"] = "vit_small"
model["pretrain"] = False
model["pretrain_weight_path"] = ""
yaml_data['model'] = model
yaml_data.yaml_set_comment_before_after_key('model', '\n', indent=0)
# 其他配置项
yaml_data['connected_csv_path'] = ""
yaml_data['train_valid_dir'] = ""
yaml_data['save_dir'] = ""

# 创建 label 配置并添加注释
label = CommentedMap()
label.update({i: kind[i] for i in range(len(kind))})  # 动态生成字典内容
yaml_data['label'] = label  # 赋值给 YAML 数据
yaml_data.yaml_set_comment_before_after_key('label', '\nlabel info:\n- Keys are class indices\n- Values are class names', indent=0)

# 写入 YAML 文件
path = 'docs/config_kimi.yaml'
try:
    with open(path, 'w', encoding='utf-8') as f:
        yaml.dump(yaml_data, f)
    print("YAML文件生成成功！")
except Exception as e:
    print(f"写入文件时出错: {e}")


YAML文件生成成功！


# 配置文件读取

# 方法一

In [80]:
# 读取 YAML 文件
try:
    with open(path, 'r') as file:
        data = yaml.load(file)
    print("YAML文件读取成功！")
    print(data)
except Exception as e:
    print(f"读取文件时出错: {e}")

YAML文件读取成功！
{'num_classes': 21, 'epochs': 10, 'batch_size': 128, 'optimizer': {'name': 'adam', 'learning_rate': 0.0001, 'step_size': 50, 'gamma': 0.8}, 'model': {'name': 'vit_small', 'pretrain': False, 'pretrain_weight_path': ''}, 'connected_csv_path': '', 'train_valid_dir': '', 'save_dir': '', 'label': {0: 'agricultural', 1: 'airplane', 2: 'baseballdiamond', 3: 'beach', 4: 'buildings', 5: 'chaparral', 6: 'denseresidential', 7: 'forest', 8: 'freeway', 9: 'golfcourse', 10: 'harbor', 11: 'intersection', 12: 'mediumresidential', 13: 'mobilehomepark', 14: 'overpass', 15: 'parkinglot', 16: 'river', 17: 'runway', 18: 'sparseresidential', 19: 'storagetanks', 20: 'tenniscourt'}}


# 方法2

In [7]:
import yaml
with open("D:\pycharm\yolov5-master\data\coco128.yaml", 'r', encoding='utf-8') as f:
    result = yaml.load_all(f.read(), Loader=yaml.FullLoader)
label_dict = {}
for item in result:
    print(item)

{'path': 'D:/pycharm/yolov5-master/coco128', 'train': 'images/train2017', 'val': 'images/train2017', 'test': None, 'names': {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'd

In [8]:
import os
import pandas as pd
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap


path = r"D:\Dataset\UCMerced_LandUse_Dataset\Images"

image_name_list = []
ground_truth = []
for filename in os.listdir(path):
    for image_name in os.listdir(os.path.join(path, filename)):
        name = image_name.split(".")[0]
        image_name_list.append(name)
        ground_truth.append(filename)
data = pd.DataFrame({
    "image_name": image_name_list,
    "ground_truth": ground_truth
})
data.to_csv("docs/data.csv", index=False, encoding="utf_8_sig")
# data.head()
# 创建YAML对象并配置格式
yaml = YAML()
yaml.indent(mapping=2, sequence=4, offset=2)  # 设置缩进

# 创建带注释的数据结构
data = CommentedMap()

# 添加顶层注释
data.yaml_set_start_comment(
    "task: classification"
)

# 添加数据项和注释
app_info = CommentedMap()
data['app'] = app_info
app_info['name'] = '我的应用'
app_info.yaml_set_comment_before_after_key('name', '应用名称', indent=0)

app_info['version'] = '1.0.0'
app_info.yaml_set_comment_before_after_key('version', '当前版本号')

app_info['author'] = {'name': '张三', 'email': 'zhangsan@example.com'}
app_info.yaml_set_comment_before_after_key('author', '作者信息，包含姓名和邮箱')

# 标签信息
data['tags'] = ['web', 'python', 'backend']
data.yaml_set_comment_before_after_key('tags', '应用相关的标签列表\n- 按重要性排序\n- 最多5个标签', indent=0)

# 数据库配置
data['database'] = db_config = CommentedMap()
db_config['host'] = 'localhost'
db_config['port'] = 3306
db_config['credentials'] = {'username': 'admin', 'password': 'secret'}
db_config.yaml_set_comment_before_after_key('credentials', '数据库认证信息 (敏感内容请勿提交)')

try:
    with open('docs/config.yml', 'w', encoding='utf-8') as f:
        yaml.dump(data, f)
    print("YAML文件生成成功！")
except Exception as e:
    print(f"写入文件时出错: {e}")

YAML文件生成成功！


In [66]:
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap
# 创建YAML对象并配置格式
yaml = YAML()
# yaml.indent(mapping=2, sequence=4, offset=2)  # 设置缩进

# 创建带注释的数据结构
data = CommentedMap()

# 添加顶层注释
data.yaml_set_start_comment(
    "task: classification"
)

# 添加数据项和注释
app_info = CommentedMap()
data['num_classes'] = len(kind)
data['epochs'] = 10
data.yaml_set_comment_before_after_key('epochs', 'training parameters', indent=0)
data['batch_size'] = 128
# data.yaml_set_start_comment("batch_size", "这是第一行注释\n这是第二行注释\n第三行注释")
data['optimizer'] = CommentedMap()
data['optimizer']['name'] = "adam"
data['optimizer']['learning_rate'] = 1e-4
data['optimizer']['step_size'] = 50
data['optimizer']['gamma'] = 0.8

data['model'] = CommentedMap()
data['model']["pretrain"] = False
data['model']["pretrain_weight_path"] = ""

data['connected_csv_path'] = ""
data['train_valid_dir'] = ""
data['save_dir'] = ""

# data['label'] = CommentedMap()
data['label'] = {i: kind[i] for i in range(len(kind))}, 
data.yaml_set_comment_before_after_key('label', 'label info', indent=0)
path = 'docs/config_yaogan.yaml'
try:
    with open(path, 'w', encoding='utf-8') as f:
        yaml.dump(data, f)
    print("YAML文件生成成功！")
except Exception as e:
    print(f"写入文件时出错: {e}")
try:
    yaml = YAML()
    with open(path, 'r') as file:
        data = yaml.load(file)
    print("YAML文件读取成功！")
    print(data)
except Exception as e:
    print(f"读取文件时出错: {e}")

eventtt <class 'ruamel.yaml.events.MappingStartEvent'> MappingStartEvent(tag='tag:yaml.org,2002:map', implicit=True, flow_style=False, comment=[None, [CommentToken('# batch_size\n', col: 这是第一行注释
这是第二行注释
第三行注释)], []])写入文件时出错: '<' not supported between instances of 'str' and 'int'
YAML文件读取成功！
None


In [15]:
import yaml

with open("docs\config.yml", 'r', encoding='utf-8') as f:
    result = yaml.load_all(f.read(), Loader=yaml.FullLoader)
label_dict = {}
for item in result:
    print(item)

{'app': {'name': '我的应用', 'version': '1.0.0', 'author': {'name': '张三', 'email': 'zhangsan@example.com'}}, 'tags': ['web', 'python', 'backend'], 'database': {'host': 'localhost', 'port': 3306, 'credentials': {'username': 'admin', 'password': 'secret'}}}


In [2]:
import os
import pandas as pd

path = r"D:\Dataset\UCMerced_LandUse_Dataset\Images"

image_name_list = []
ground_truth = []
for filename in os.listdir(path):
    for image_name in os.listdir(os.path.join(path, filename)):
        name = image_name.split(".")[0]
        image_name_list.append(name)
        ground_truth.append(filename)
data = pd.DataFrame({
    "image_name": image_name_list,
    "ground_truth": ground_truth
})
# data.to_csv("docs/data.csv", index=False, encoding="utf_8_sig")
data.head()

Unnamed: 0,image_name,ground_truth
0,agricultural00,agricultural
1,agricultural01,agricultural
2,agricultural02,agricultural
3,agricultural03,agricultural
4,agricultural04,agricultural


In [5]:
from ruamel.yaml import YAML
from ruamel.yaml.comments import CommentedMap
import os
import pandas as pd

path = r"D:\Dataset\UCMerced_LandUse_Dataset\Images"

path_list, image_name_list = [], []
ground_truth = []
for filename in os.listdir(path):
    for image_name in os.listdir(os.path.join(path, filename)):
        path_list.append(os.path.join(path, filename, image_name))
        name = image_name.split(".")[0]
        image_name_list.append(name)
        ground_truth.append(filename)
data = pd.DataFrame({
    "image_path": path_list,
    "image_name": image_name_list,
    "ground_truth": ground_truth
})
data.to_csv("docs/data.csv", index=False, encoding="utf_8_sig")
# 示例数据
kind = data["ground_truth"].unique()

# 创建 YAML 对象并配置格式
yaml = YAML()
yaml.indent(mapping=2, sequence=4, offset=2)  # 设置缩进

# 创建带注释的数据结构
yaml_data = CommentedMap()

# 添加顶层注释
yaml_data.yaml_set_start_comment("task: classification\nVersion: 1.0")
yaml_data['data_path'] = path
yaml_data.yaml_set_comment_before_after_key('data path', indent=0)
yaml_data['table_path'] = "docs/data.csv"
yaml_data.yaml_set_comment_before_after_key('table path', indent=0)
yaml_data['index_name'] = "image_name"
yaml_data.yaml_set_comment_before_after_key('index_name', '', indent=0)
yaml_data['target_label'] = "ground_truth"
yaml_data.yaml_set_comment_before_after_key('target_label', '', indent=0)
yaml_data['train_radio'] = 0.5
yaml_data.yaml_set_comment_before_after_key('train_radio', '', indent=0)

# 添加数据项和注释
yaml_data['num_classes'] = len(kind)
yaml_data.yaml_set_comment_before_after_key('num_classes', '\nNumber of classes', indent=0)
yaml_data['epochs'] = 10
yaml_data.yaml_set_comment_before_after_key('epochs', 'Training configuration root keys below:\n- epochs\n- batch_size', indent=0)
yaml_data['batch_size'] = 128
yaml_data.yaml_set_comment_before_after_key('batch_size', 'Batch size for training', indent=0)

# 创建 optimizer 配置
optimizer = CommentedMap()
optimizer['name'] = "adam"
optimizer['learning_rate'] = 1e-4
optimizer.yaml_set_comment_before_after_key('learning_rate', 'Learning rate', indent=2)
optimizer['step_size'] = 50
optimizer['gamma'] = 0.8
yaml_data['optimizer'] = optimizer
yaml_data.yaml_set_comment_before_after_key('optimizer', '\n', indent=0)
# 创建 model 配置
model = CommentedMap()
model["name"] = "vit_small"
model["pretrain"] = False
model["pretrain_weight_path"] = ""
yaml_data['model'] = model
yaml_data.yaml_set_comment_before_after_key('model', '\n', indent=0)
# 其他配置项
yaml_data['connected_csv_path'] = ""
yaml_data['train_valid_dir'] = ""
yaml_data['save_dir'] = ""

# 创建 label 配置并添加注释
label = CommentedMap()
label.update({i: kind[i] for i in range(len(kind))})  # 动态生成字典内容
yaml_data['label'] = label  # 赋值给 YAML 数据
yaml_data.yaml_set_comment_before_after_key('label', '\nlabel info:\n- Keys are class indices\n- Values are class names', indent=0)

# 写入 YAML 文件
path = 'docs/config_kimi.yaml'
try:
    with open(path, 'w', encoding='utf-8') as f:
        yaml.dump(yaml_data, f)
    print("YAML文件生成成功！")
except Exception as e:
    print(f"写入文件时出错: {e}")

# 读取 YAML 文件
try:
    with open(path, 'r') as file:
        data = yaml.load(file)
    print("YAML文件读取成功！")
    print(data)
except Exception as e:
    print(f"读取文件时出错: {e}")

YAML文件生成成功！
YAML文件读取成功！
{'data_path': 'D:\\Dataset\\UCMerced_LandUse_Dataset\\Images', 'table_path': 'docs/data.csv', 'index_name': 'image_name', 'target_label': 'ground_truth', 'train_radio': 0.5, 'num_classes': 21, 'epochs': 10, 'batch_size': 128, 'optimizer': {'name': 'adam', 'learning_rate': 0.0001, 'step_size': 50, 'gamma': 0.8}, 'model': {'name': 'vit_small', 'pretrain': False, 'pretrain_weight_path': ''}, 'connected_csv_path': '', 'train_valid_dir': '', 'save_dir': '', 'label': {0: 'agricultural', 1: 'airplane', 2: 'baseballdiamond', 3: 'beach', 4: 'buildings', 5: 'chaparral', 6: 'denseresidential', 7: 'forest', 8: 'freeway', 9: 'golfcourse', 10: 'harbor', 11: 'intersection', 12: 'mediumresidential', 13: 'mobilehomepark', 14: 'overpass', 15: 'parkinglot', 16: 'river', 17: 'runway', 18: 'sparseresidential', 19: 'storagetanks', 20: 'tenniscourt'}}
