In [6]:
import polars as pl
from sentence_transformers import SentenceTransformer, util

In [29]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
df = pl.read_csv("./csv_data//deoksan.csv")
col_lis = list(df.schema.keys())

In [33]:
long_df = df.unpivot(
    index=["time"], 
    variable_name="sensor_key", 
    value_name="sensor_value"
)

In [35]:
len(long_df)

54181864

In [30]:
print(len(col_lis))
print(col_lis)

12
['time', 'curr', 'currR', 'currS', 'currT', 'Ground', 'PT100', 'Vibra', 'Volt', 'VoltR', 'VoltS', 'VoltT']


In [None]:
test_lis = ["time", "curr", "open", "high", "low", "close", "volume"]

In [None]:
embeddings_col = model.encode(col_lis)
embeddings_test = model.encode(test_lis)

In [None]:
cos_sim = util.pytorch_cos_sim(embeddings_col, embeddings_test)

In [None]:
cos_sim

tensor([[1.0000, 0.2815, 0.3500, 0.3599, 0.2940, 0.3690, 0.2843],
        [0.2815, 1.0000, 0.2494, 0.2326, 0.1609, 0.2941, 0.1359],
        [0.2526, 0.8962, 0.2469, 0.1903, 0.1107, 0.2960, 0.1507],
        [0.2837, 0.7859, 0.2376, 0.1964, 0.1306, 0.2623, 0.1399],
        [0.3048, 0.8718, 0.2288, 0.1948, 0.1153, 0.3028, 0.1356],
        [0.2937, 0.1676, 0.2247, 0.3562, 0.3964, 0.2598, 0.2770],
        [0.2539, 0.1860, 0.1843, 0.2260, 0.2490, 0.2050, 0.2276],
        [0.1850, 0.2069, 0.2003, 0.1871, 0.1757, 0.2397, 0.1827],
        [0.3318, 0.2287, 0.2352, 0.3520, 0.3484, 0.3218, 0.5543],
        [0.2524, 0.2876, 0.2477, 0.3130, 0.2403, 0.2996, 0.4205],
        [0.3014, 0.2136, 0.2043, 0.3438, 0.3655, 0.2958, 0.5674],
        [0.2648, 0.2697, 0.2373, 0.3036, 0.2599, 0.3133, 0.3714]])

In [None]:
import pandas as pd
df_sim = pd.DataFrame(
    cos_sim.numpy(),
    index=col_lis,
    columns=test_lis
)

In [None]:
df_sim

Unnamed: 0,time,curr,open,high,low,close,volume
time,1.0,0.281514,0.34997,0.359886,0.294038,0.369033,0.284349
curr,0.281514,1.0,0.249423,0.232571,0.160885,0.294118,0.135923
currR,0.252592,0.896221,0.246941,0.1903,0.110683,0.296023,0.150738
currS,0.283679,0.785856,0.237553,0.196365,0.130628,0.262291,0.139895
currT,0.304782,0.871761,0.228802,0.194801,0.115335,0.302783,0.135615
Ground,0.293742,0.167581,0.224695,0.356201,0.396437,0.259835,0.276982
PT100,0.253947,0.186023,0.184275,0.226033,0.248988,0.204997,0.227554
Vibra,0.185005,0.206896,0.200322,0.187099,0.175703,0.239717,0.182725
Volt,0.33178,0.228741,0.235239,0.351962,0.348422,0.3218,0.554284
VoltR,0.252428,0.287573,0.247696,0.313022,0.240265,0.2996,0.420484


In [None]:
results = []
for i, src in enumerate(col_lis):
    best_idx = cos_sim[i].argmax().item()
    best_score = cos_sim[i][best_idx].item()
    best_target = test_lis[best_idx]
    
    results.append({
        "Raw_Column": src,
        "Mapped_Ontology": best_target,
        "Score": round(best_score, 2),
        "Assessment": "‚úÖ Good" if best_score > 0.5 else "‚ö†Ô∏è Check" # ÏûÑÍ≥ÑÍ∞íÏùÄ ÏÉÅÌô©Îî∞Îùº Ï°∞Ï†à
    })

pd.DataFrame(results)

Unnamed: 0,Raw_Column,Mapped_Ontology,Score,Assessment
0,time,time,1.0,‚úÖ Good
1,curr,curr,1.0,‚úÖ Good
2,currR,curr,0.9,‚úÖ Good
3,currS,curr,0.79,‚úÖ Good
4,currT,curr,0.87,‚úÖ Good
5,Ground,low,0.4,‚ö†Ô∏è Check
6,PT100,time,0.25,‚ö†Ô∏è Check
7,Vibra,close,0.24,‚ö†Ô∏è Check
8,Volt,volume,0.55,‚úÖ Good
9,VoltR,volume,0.42,‚ö†Ô∏è Check


In [None]:
# Ïª¨ÎüºÎ™ÖÎßå Ï∂îÏ∂úÌï¥ÏÑú ÏûÑÎ≤†Îî© ÌõÑ shapeÏôÄ ÏùºÎ∂Ä Í∞íÎßå Ï∂úÎ†•ÌïòÎèÑÎ°ù Í∞úÏÑ†
column_names = [col for col, dtype in df.schema.items()]
embeddings = model.encode(column_names)
print("Embeddings shape:", embeddings.shape)
print("First vector:", embeddings[0])


In [18]:
# 2. ÏóâÎßùÏù∏ ÌååÏùºÎ™Ö Î¶¨Ïä§Ìä∏ (Input Data)
filenames = [
    "Inj_Machine_Log_01.csv",       # ÏïΩÏñ¥ ÏÇ¨Ïö©
    "Plastic_Molding_Data.csv",     # ÎèôÏùòÏñ¥ ÏÇ¨Ïö©
    "Robot_Arm_Axis_X.csv",         # Íµ¨Ï≤¥Ï†Å Î∂ÄÌíà
    "Auto_Welder_Final.csv",        # Ïö©Ï†ëÍ∏∞
    "Factory_Pump_Vib.csv",         # ÌéåÌîÑ
    "M01_Unknown.csv",               # ÏùòÎØ∏ Î∂àÎ™Ö (Îß§Ìïë Ïã§Ìå® ÏòàÏÉÅ)
    "CNC_Machine_Data.csv",
    "cnc.csv",
    "cnc_data.csv",
    "cnc_data_01.csv",
    "cnc_data_02.csv",
    "cnc_data_03.csv",
    "cnc_data_04.csv",
    
]

# 3. Ïò®ÌÜ®Î°úÏßÄ ÌëúÏ§Ä ÌÅ¥ÎûòÏä§ Ï†ïÏùò (Target Classes)
# AIÍ∞Ä ÌååÏùºÎ™ÖÏùÑ Î≥¥Í≥† Ïù¥ Ï§ë ÌïòÎÇòÎ•º Í≥®ÎùºÏïº Ìï®
ontology_classes = [
    "Injection_Molding_Machine", # ÏÇ¨Ï∂úÍ∏∞
    "Welding_Robot",             # Ïö©Ï†ë Î°úÎ¥á
    "Industrial_Pump",           # ÌéåÌîÑ
    "CNC_Machine",               # CNC
    "Conveyor_Belt"              # Ïª®Î≤†Ïù¥Ïñ¥
]

In [19]:
def preprocess_filename(fname):
    # ÌôïÏû•Ïûê Ï†úÍ±∞ Î∞è ÌäπÏàòÎ¨∏ÏûêÎ•º Í≥µÎ∞±ÏúºÎ°ú Î≥ÄÌôòÌïòÏó¨ 'Î¨∏Ïû•'Ï≤òÎüº ÎßåÎì¶
    name = fname.replace('.csv', '').replace('_', ' ').replace('-', ' ')
    return name

In [20]:
import pandas as pd
def map_filenames_to_classes(files, classes):
    # ÏûÑÎ≤†Îî©
    clean_names = [preprocess_filename(f) for f in files]
    embeddings_files = model.encode(clean_names)
    embeddings_classes = model.encode(classes)
    
    # Ïú†ÏÇ¨ÎèÑ Í≥ÑÏÇ∞
    scores = util.cos_sim(embeddings_files, embeddings_classes)
    
    results = []
    for i, fname in enumerate(files):
        best_idx = scores[i].argmax().item()
        best_score = scores[i][best_idx].item()
        best_class = classes[best_idx]
        
        # Ïú†ÏÇ¨ÎèÑÍ∞Ä ÎÇÆÏúºÎ©¥(0.4 ÎØ∏Îßå) Î∂ÑÎ•ò Î≥¥Î•ò
        category = best_class if best_score > 0.4 else "Unclassified"
        
        results.append({
            "Filename": fname,
            "Interpreted_As": clean_names[i],
            "Mapped_Class": category,
            "Confidence": round(best_score, 3)
        })
        
    return pd.DataFrame(results)

In [None]:
import polars as pl
import os
from rdflib import Graph, Literal, RDF, Namespace, XSD
import urllib.parse
from datetime import datetime

# 1. ÏÑ§Ï†ï
INPUT_FOLDER = "./factory_data"  # Îç∞Ïù¥ÌÑ∞Í∞Ä Ï†ÄÏû•Îêú Ìè¥Îçî
META_FILE = "metadata_ontology.ttl"

# ÎÑ§ÏûÑÏä§ÌéòÏù¥Ïä§ ÏÑ§Ï†ï
BASE_URI = "http://factory.org/meta/"
META = Namespace(BASE_URI)
g = Graph()
g.bind("meta", META)

def build_metadata_dataset():
    print("üßê Î©îÌÉÄ Îç∞Ïù¥ÌÑ∞ÏÖã Íµ¨Ï∂ïÏùÑ ÏãúÏûëÌï©ÎãàÎã§...")
    
    # Ìè¥Îçî ÎÇ¥ ÌååÏùº ÌÉêÏÉâ
    if not os.path.exists(INPUT_FOLDER):
        os.makedirs(INPUT_FOLDER)
        print(f"   [ÏïàÎÇ¥] '{INPUT_FOLDER}' Ìè¥ÎçîÍ∞Ä ÏÉùÏÑ±ÎêòÏóàÏäµÎãàÎã§. Îç∞Ïù¥ÌÑ∞Î•º ÎÑ£Í≥† Îã§Ïãú Ïã§ÌñâÌï¥ Ï£ºÏÑ∏Ïöî.")
        return

    files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".csv")]
    
    for filename in files:
        file_path = os.path.join(INPUT_FOLDER, filename)
        
        # 1. [ÌïµÏã¨] Polars Lazy APIÎ°ú Ïä§ÌÇ§ÎßàÎßå Îπ†Î•¥Í≤å Ïä§Ï∫î (Îç∞Ïù¥ÌÑ∞ Î°úÎî© X)
        try:
            # scan_csvÎäî Ïã§Ï†ú Îç∞Ïù¥ÌÑ∞Î•º Î©îÎ™®Î¶¨Ïóê Ïò¨Î¶¨ÏßÄ ÏïäÏäµÎãàÎã§. Îß§Ïö∞ Îπ†Î¶ÖÎãàÎã§.
            lf = pl.scan_csv(file_path)
            schema = lf.collect_schema() # Ïª¨Îüº Ï†ïÎ≥¥Îßå Í∞ÄÏ†∏Ïò¥
        except Exception as e:
            print(f"   [Ï£ºÏùò] ÌååÏùºÏùÑ ÏùΩÏùÑ Ïàò ÏóÜÏäµÎãàÎã§: {filename} ({e})")
            continue

        # 2. Îç∞Ïù¥ÌÑ∞ÏÖã(ÌååÏùº) Í∞ùÏ≤¥ ÏÉùÏÑ±
        # ÌååÏùºÎ™ÖÏùÑ IDÎ°ú ÏÇ¨Ïö©
        dataset_name = os.path.splitext(filename)[0]
        dataset_uri = META[urllib.parse.quote(dataset_name)]
        
        g.add((dataset_uri, RDF.type, META.Dataset))
        g.add((dataset_uri, META.hasFilePath, Literal(os.path.abspath(file_path)))) # Î¨ºÎ¶¨Ï†Å Í≤ΩÎ°ú Ï†ÄÏû•
        g.add((dataset_uri, META.hasFileName, Literal(filename)))
        
        # 3. Ïª¨Îüº Ï†ïÎ≥¥(Î©îÌÉÄÎç∞Ïù¥ÌÑ∞) Îì±Î°ù
        print(f"   Process: {filename} (Ïª¨Îüº {len(schema)}Í∞ú Î∞úÍ≤¨)")
        
        for col_name, data_type in schema.items():
            # Ïª¨Îüº Í∞ùÏ≤¥ ÏÉùÏÑ± (Dataset_ColumnName)
            col_id = f"{dataset_name}_{col_name}"
            col_uri = META[urllib.parse.quote(col_id)]
            
            # Í¥ÄÍ≥Ñ ÏÑ§Ï†ï: Îç∞Ïù¥ÌÑ∞ÏÖã --[hasColumn]--> Ïª¨Îüº
            g.add((col_uri, RDF.type, META.DataColumn))
            g.add((dataset_uri, META.hasColumn, col_uri))
            
            # Ïª¨Îüº ÏÜçÏÑ± Ï†ÄÏû• (Ïù¥Î¶Ñ, Îç∞Ïù¥ÌÑ∞ ÌÉÄÏûÖ)
            g.add((col_uri, META.columnName, Literal(col_name)))
            g.add((col_uri, META.dataType, Literal(str(data_type))))

    # Í≤∞Í≥º Ï†ÄÏû•
    g.serialize(destination=META_FILE, format="turtle")
    print(f"‚úÖ Î©îÌÉÄ Îç∞Ïù¥ÌÑ∞ÏÖã Íµ¨Ï∂ïÏù¥ ÏôÑÎ£åÎêòÏóàÏäµÎãàÎã§! ({META_FILE})")

# Ïã§Ìñâ
if __name__ == "__main__":
    # ÌÖåÏä§Ìä∏Ïö© ÎçîÎØ∏ ÌååÏùº ÏÉùÏÑ± (ÏóÜÏùÑ Í≤ΩÏö∞)
    if not os.path.exists(INPUT_FOLDER):
        os.makedirs(INPUT_FOLDER)
        with open(f"{INPUT_FOLDER}/Motor_Log_2024.csv", "w") as f:
            f.write("time,currR,currS,currT,Unknown_X\n10:00,10,10,10,0.1")
            
    build_metadata_dataset()

In [21]:
# Ïã§Ìñâ
df_result = map_filenames_to_classes(filenames, ontology_classes)
print(df_result)

                    Filename        Interpreted_As               Mapped_Class  \
0     Inj_Machine_Log_01.csv    Inj Machine Log 01               Unclassified   
1   Plastic_Molding_Data.csv  Plastic Molding Data  Injection_Molding_Machine   
2       Robot_Arm_Axis_X.csv      Robot Arm Axis X              Welding_Robot   
3      Auto_Welder_Final.csv     Auto Welder Final              Welding_Robot   
4       Factory_Pump_Vib.csv      Factory Pump Vib            Industrial_Pump   
5            M01_Unknown.csv           M01 Unknown               Unclassified   
6       CNC_Machine_Data.csv      CNC Machine Data                CNC_Machine   
7                    cnc.csv                   cnc                CNC_Machine   
8               cnc_data.csv              cnc data                CNC_Machine   
9            cnc_data_01.csv           cnc data 01                CNC_Machine   
10           cnc_data_02.csv           cnc data 02                CNC_Machine   
11           cnc_data_03.csv

In [22]:
df_result

Unnamed: 0,Filename,Interpreted_As,Mapped_Class,Confidence
0,Inj_Machine_Log_01.csv,Inj Machine Log 01,Unclassified,0.337
1,Plastic_Molding_Data.csv,Plastic Molding Data,Injection_Molding_Machine,0.437
2,Robot_Arm_Axis_X.csv,Robot Arm Axis X,Welding_Robot,0.406
3,Auto_Welder_Final.csv,Auto Welder Final,Welding_Robot,0.557
4,Factory_Pump_Vib.csv,Factory Pump Vib,Industrial_Pump,0.622
5,M01_Unknown.csv,M01 Unknown,Unclassified,0.233
6,CNC_Machine_Data.csv,CNC Machine Data,CNC_Machine,0.806
7,cnc.csv,cnc,CNC_Machine,0.756
8,cnc_data.csv,cnc data,CNC_Machine,0.713
9,cnc_data_01.csv,cnc data 01,CNC_Machine,0.663


In [23]:
from rdflib import Graph, RDF, Namespace, Literal

g = Graph()
FACT = Namespace("http://factory.org/")

for index, row in df_result.iterrows():
    if row['Mapped_Class'] == "Unclassified":
        continue
        
    # 1. Îç∞Ïù¥ÌÑ∞ÏÖã Í∞ùÏ≤¥ ÏÉùÏÑ± (ÌååÏùºÎ™Ö Í∏∞Î∞ò)
    dataset_uri = FACT[row['Filename'].replace('.csv', '')]
    
    # 2. Îß§ÌïëÎêú ÌÅ¥ÎûòÏä§ Í∞ÄÏ†∏Ïò§Í∏∞
    # Ïòà: Injection_Molding_Machine ÌÅ¥ÎûòÏä§
    class_uri = FACT[row['Mapped_Class']]
    
    # 3. [ÌïµÏã¨] Îç∞Ïù¥ÌÑ∞ÏÖãÏùÑ Ìï¥Îãπ ÌÅ¥ÎûòÏä§Ïùò Îç∞Ïù¥ÌÑ∞ÎùºÍ≥† Ï†ïÏùò
    # ÏùòÎØ∏: "Inj_Machine_Log_01.csvÎäî ÏÇ¨Ï∂úÍ∏∞ Îç∞Ïù¥ÌÑ∞Ïùò ÏùºÏ¢ÖÏù¥Îã§"
    g.add((dataset_uri, FACT.isDataOf, class_uri))
    
    # Ï∂îÍ∞Ä: Ïã†Î¢∞ÎèÑ Ï†êÏàòÎèÑ Î©îÌÉÄÎç∞Ïù¥ÌÑ∞Î°ú Ï†ÄÏû• (ÎÇòÏ§ëÏóê Í≤ÄÏ¶ùÏö©)
    g.add((dataset_uri, FACT.mappingConfidence, Literal(row['Confidence'])))

In [25]:
g.serialize(destination='factory_data.ttl', format='turtle')

<Graph identifier=Nd1fe5ecce7be4316b3e2b38348211c4e (<class 'rdflib.graph.Graph'>)>

- mapped_machine -> mapped_col


In [28]:
df

NameError: name 'df' is not defined

In [38]:
import polars as pl
import pandas as pd
import os
import re
from sentence_transformers import SentenceTransformer, util
from rdflib import Graph, Literal, RDF, Namespace, XSD
import urllib.parse
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass

# ============================================
# 1. Í∑úÏπô Í∏∞Î∞ò Îß§Ìïë (Rule-based Mapping)
# ============================================
def build_hybrid_ontology(
    input_folder: str,
    ontology_classes: List[str],
    output_file: str = "metadata_ontology.ttl",
    model_name: str = 'sentence-transformers/all-MiniLM-L6-v2'
):
    """
    ÌïòÏù¥Î∏åÎ¶¨Îìú Ï†ëÍ∑ºÎ≤ïÏúºÎ°ú Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ï
    
    Args:
        input_folder: CSV ÌååÏùºÏù¥ ÏûàÎäî Ìè¥Îçî
        ontology_classes: ÌëúÏ§Ä Ïò®ÌÜ®Î°úÏßÄ ÌÅ¥ÎûòÏä§ Î¶¨Ïä§Ìä∏
        output_file: Ï∂úÎ†• TTL ÌååÏùºÎ™Ö
        model_name: SentenceTransformer Î™®Îç∏Î™Ö
    """
    print("üßê ÌïòÏù¥Î∏åÎ¶¨Îìú Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ïÏùÑ ÏãúÏûëÌï©ÎãàÎã§...")
    
    # ÎÑ§ÏûÑÏä§ÌéòÏù¥Ïä§ ÏÑ§Ï†ï
    BASE_URI = "http://factory.org/meta/"
    META = Namespace(BASE_URI)
    FACT = Namespace("http://factory.org/")
    g = Graph()
    g.bind("meta", META)
    g.bind("fact", FACT)
    
    # ÌïòÏù¥Î∏åÎ¶¨Îìú Îß§Ìçº Ï¥àÍ∏∞Ìôî
    mapper = HybridMapper(ontology_classes, model_name)
    
    # Ìè¥Îçî ÎÇ¥ ÌååÏùº ÌÉêÏÉâ
    if not os.path.exists(input_folder):
        os.makedirs(input_folder)
        print(f"   [ÏïàÎÇ¥] '{input_folder}' Ìè¥ÎçîÍ∞Ä ÏÉùÏÑ±ÎêòÏóàÏäµÎãàÎã§.")
        # Îπà DataFrameÍ≥º Graph Î∞òÌôò
        return pd.DataFrame(), g
    
    files = [f for f in os.listdir(input_folder) if f.endswith((".csv", ".CSV"))]
    
    if not files:
        print(f"   [Í≤ΩÍ≥†] '{input_folder}' Ìè¥ÎçîÏóê CSV ÌååÏùºÏù¥ ÏóÜÏäµÎãàÎã§.")
        # Îπà DataFrameÍ≥º Graph Î∞òÌôò
        return pd.DataFrame(), g
    
    # ÌååÏùºÎ≥Ñ Ïª¨Îüº Ï†ïÎ≥¥ ÏàòÏßë (Ïä§ÌÇ§ÎßàÎßå Ïä§Ï∫î)
    file_columns = {}
    for filename in files:
        file_path = os.path.join(input_folder, filename)
        try:
            lf = pl.scan_csv(file_path)
            schema = lf.collect_schema()
            file_columns[filename] = list(schema.keys())
        except Exception as e:
            print(f"   [Ï£ºÏùò] ÌååÏùºÏùÑ ÏùΩÏùÑ Ïàò ÏóÜÏäµÎãàÎã§: {filename} ({e})")
            file_columns[filename] = []
    
    # ÌïòÏù¥Î∏åÎ¶¨Îìú Îß§Ìïë ÏàòÌñâ
    mapping_df = mapper.map_files(files, file_columns)
    
    print("\nüìä Îß§Ìïë Í≤∞Í≥º:")
    print(mapping_df.to_string(index=False))
    
    # Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ï
    for _, row in mapping_df.iterrows():
        filename = row['Filename']
        mapped_class = row['Mapped_Class']
        confidence = row['Confidence']
        method = row['Method']
        
        if mapped_class == "Unclassified":
            print(f"   ‚ö†Ô∏è  {filename}: Îß§Ìïë Ïã§Ìå® (ÏàòÎèô Í≤ÄÌÜ† ÌïÑÏöî)")
            continue
        
        # ÌååÏùº Í≤ΩÎ°ú
        file_path = os.path.join(input_folder, filename)
        dataset_name = os.path.splitext(filename)[0]
        dataset_uri = FACT[urllib.parse.quote(dataset_name)]
        class_uri = FACT[urllib.parse.quote(mapped_class)]
        
        # Îç∞Ïù¥ÌÑ∞ÏÖã Í∞ùÏ≤¥ ÏÉùÏÑ±
        g.add((dataset_uri, RDF.type, META.Dataset))
        g.add((dataset_uri, META.hasFileName, Literal(filename)))
        g.add((dataset_uri, META.hasFilePath, Literal(os.path.abspath(file_path))))
        
        # ÌÅ¥ÎûòÏä§ Îß§Ìïë
        g.add((dataset_uri, FACT.isDataOf, class_uri))
        g.add((dataset_uri, META.mappingConfidence, Literal(float(confidence), datatype=XSD.float)))
        g.add((dataset_uri, META.mappingMethod, Literal(method)))
        
        # Ïª¨Îüº Ï†ïÎ≥¥ Ï∂îÍ∞Ä
        if filename in file_columns:
            for col_name in file_columns[filename]:
                col_id = f"{dataset_name}_{col_name}"
                col_uri = META[urllib.parse.quote(col_id)]
                
                g.add((col_uri, RDF.type, META.DataColumn))
                g.add((dataset_uri, META.hasColumn, col_uri))
                g.add((col_uri, META.columnName, Literal(col_name)))
        
        print(f"   ‚úÖ {filename} ‚Üí {mapped_class} ({method}, confidence: {confidence:.2f})")
    
    # Í≤∞Í≥º Ï†ÄÏû•
    if len(mapping_df) > 0:  # ÌååÏùºÏù¥ ÏûàÏùÑ ÎïåÎßå Ï†ÄÏû•
        g.serialize(destination=output_file, format="turtle")
        print(f"\n‚úÖ Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ï ÏôÑÎ£å! ({output_file})")
        
        # ÌÜµÍ≥Ñ Ï∂úÎ†•
        method_counts = mapping_df['Method'].value_counts()
        print("\nüìà Îß§Ìïë Î∞©Î≤ï ÌÜµÍ≥Ñ:")
        for method, count in method_counts.items():
            print(f"   {method}: {count}Í∞ú")
    
    return mapping_df, g

In [40]:
if __name__ == "__main__":
    # Ïò®ÌÜ®Î°úÏßÄ ÌÅ¥ÎûòÏä§ Ï†ïÏùò
    ontology_classes = [
        "Injection_Molding_Machine",
        "Welding_Robot",
        "Industrial_Pump",
        "CNC_Machine",
        "Conveyor_Belt",
        "Motor"
    ]
    
    # Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ï Ïã§Ìñâ
    mapping_df, graph = build_hybrid_ontology(
        input_folder="./csv_data",
        ontology_classes=ontology_classes,
        output_file="metadata_ontology_hybrid.ttl"
    )
    
    # Í≤∞Í≥º ÌôïÏù∏
    print("\n" + "="*50)
    print("Îß§Ìïë Í≤∞Í≥º ÏÉÅÏÑ∏:")
    print("="*50)
    print(mapping_df)

üßê ÌïòÏù¥Î∏åÎ¶¨Îìú Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ïÏùÑ ÏãúÏûëÌï©ÎãàÎã§...

üìä Îß§Ìïë Í≤∞Í≥º:
   Filename Interpreted_As Mapped_Class  Confidence           Method                              Column_Hint
deoksan.csv        deoksan        Motor         0.8 column_inference Detected from columns: time, curr, currR
   ‚úÖ deoksan.csv ‚Üí Motor (column_inference, confidence: 0.80)

‚úÖ Ïò®ÌÜ®Î°úÏßÄ Íµ¨Ï∂ï ÏôÑÎ£å! (metadata_ontology_hybrid.ttl)

üìà Îß§Ìïë Î∞©Î≤ï ÌÜµÍ≥Ñ:
   column_inference: 1Í∞ú

Îß§Ìïë Í≤∞Í≥º ÏÉÅÏÑ∏:
      Filename Interpreted_As Mapped_Class  Confidence            Method  \
0  deoksan.csv        deoksan        Motor         0.8  column_inference   

                                Column_Hint  
0  Detected from columns: time, curr, currR  
