In [3]:
import mlcroissant as mlc

In [None]:


# 📁 데이터 저장 위치 정의
distribution = []

# 네트워크 파일들 (net_1 ~ net_5)
for i in range(1, 6):
    net_id = f"net_{i}"
    folder_url = f"https://github.com/yourusername/yourrepo/raw/main/network/{net_id}/"
    file_ids = ["additional.xml", "net.xml", "od.xml", "routes.csv", "taz.xml"]
    
    # 개별 파일 등록
    for filename in file_ids:
        distribution.append(
            mlc.FileObject(
                id=f"{net_id}-{filename}",
                name=filename,
                description=f"{filename} file for {net_id}.",
                content_url=folder_url + filename,
                encoding_formats=["application/xml" if filename.endswith(".xml") else "text/csv"],
            )
        )

# 센서 파일 등록
sensor_files = []
for i in range(1, 6):
    file_name = f"sensor_net_{i}.csv"
    file_url = f"https://github.com/yourusername/yourrepo/raw/main/sensor_data/{file_name}"
    
    # 센서 데이터 FileObject
    distribution.append(
        mlc.FileObject(
            id=f"sensor-{i}",
            name=file_name,
            description=f"Sensor flow data for net_{i}.",
            content_url=file_url,
            encoding_formats=["text/csv"],
        )
    )
    
    # 센서 데이터 RecordSet (각 네트워크별로 별도 필드 정의 가능)
    sensor_files.append(
        mlc.RecordSet(
            id=f"sensor-records-{i}",
            name=f"sensor_net_{i}",
            fields=[
                mlc.Field(
                    id=f"edge_id-{i}",
                    name="edge_id",
                    description="Edge ID where the sensor is located.",
                    data_types=mlc.DataType.TEXT,
                    source=mlc.Source(
                        file_object=f"sensor-{i}",
                        extract=mlc.Extract(column="edge_id"),
                    ),
                ),
                mlc.Field(
                    id=f"flow-{i}",
                    name="interval_nVehContrib",
                    description="Flow (number of vehicles) through the sensor on this edge.",
                    data_types=mlc.DataType.INTEGER,
                    source=mlc.Source(
                        file_object=f"sensor-{i}",
                        extract=mlc.Extract(column="interval_nVehContrib"),
                    ),
                )
            ],
        )
    )

# 전체 Croissant 메타데이터 생성
metadata = mlc.Metadata(
    name="Multi-Network Traffic Simulation Dataset",
    description="Dataset containing SUMO simulation files and sensor-based flow data for 5 different traffic networks.",
    license="https://creativecommons.org/licenses/by/4.0/",
    creator={"@type": "Person", "name": "Your Name"},
    url="https://github.com/yourusername/yourrepo",
    distribution=distribution,
    record_sets=sensor_files,
)
