In [1]:
import json
import re
import requests
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
MAX_THREADS = 10 
WIKIDATA_SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
WIKIDATA_API_ENDPOINT = "https://www.wikidata.org/w/api.php"

def query_wikidata(sparql_query):
    """Truy v·∫•n Wikidata v√† tr·∫£ v·ªÅ k·∫øt qu·∫£ ph√π h·ª£p v·ªõi t·ª´ng lo·∫°i truy v·∫•n"""
    headers = {"User-Agent": "Mozilla/5.0", "Accept": "application/sparql-results+json"}
    response = requests.get(WIKIDATA_SPARQL_ENDPOINT, params={"query": sparql_query, "format": "json"}, headers=headers)

    if response.status_code == 200:
        data = response.json()

        # X·ª≠ l√Ω truy v·∫•n ASK (boolean)
        if "boolean" in data:
            return data["boolean"]

        # X·ª≠ l√Ω truy v·∫•n SELECT (tr·∫£ v·ªÅ bindings)
        results = data.get("results", {}).get("bindings", [])
        answers = []

        for result in results:
            for var in result:  # Duy·ªát qua t·∫•t c·∫£ c√°c bi·∫øn tr·∫£ v·ªÅ
                value = result[var]["value"]
                answers.append(value)  # Ch·∫•p nh·∫≠n t·∫•t c·∫£ gi√° tr·ªã, kh√¥ng ch·ªâ th·ª±c th·ªÉ Wikidata

        return answers  # Tr·∫£ v·ªÅ to√†n b·ªô danh s√°ch k·∫øt qu·∫£

    return None  # Tr·∫£ v·ªÅ None n·∫øu l·ªói x·∫£y ra
 # Tr·∫£ v·ªÅ danh s√°ch r·ªóng n·∫øu c√≥ l·ªói


def get_wikidata_label(entity_id):
    """ L·∫•y nh√£n (label) ti·∫øng Vi·ªát ho·∫∑c ti·∫øng Anh c·ªßa m·ªôt th·ª±c th·ªÉ/quan h·ªá """
    params = {
        "action": "wbgetentities",
        "ids": entity_id,
        "languages": "vi|en",
        "format": "json",
    }
    response = requests.get(WIKIDATA_API_ENDPOINT, params=params)
    if response.status_code == 200:
        data = response.json()
        labels = data.get("entities", {}).get(entity_id, {}).get("labels", {})
        return labels.get("vi", {}).get("value") or labels.get("en", {}).get("value") or entity_id
    return entity_id

def extract_entities_relations(sparql_query):
    """ Tr√≠ch xu·∫•t c√°c th·ª±c th·ªÉ (QID) v√† quan h·ªá (PID) t·ª´ SPARQL """
    entities = set(re.findall(r"wd:(Q\d+)", sparql_query))
    relations = set(re.findall(r"wdt:(P\d+)|p:(P\d+)|ps:(P\d+)|pq:(P\d+)", sparql_query))
    relations = {r for t in relations for r in t if r}  # X·ª≠ l√Ω tuple match
    return list(entities), list(relations)

def convert_s_expression_to_nor_s_expr(s_expression, entity_map, relation_map):
    """ Chuy·ªÉn s_expression th√†nh nor_s_expr b·∫±ng c√°ch thay th·∫ø m√£ Q/P b·∫±ng nh√£n """
    def replace_match(match):
        code = match.group(0)
        return f"[ {entity_map.get(code, relation_map.get(code, code))} ]"

    return re.sub(r"Q\d+|P\d+", replace_match, s_expression)

def preprocess_question(question):
    """ Lo·∫°i b·ªè d·∫•u `{}` trong c√¢u h·ªèi """
    return re.sub(r"[{}]", "", question)



In [None]:
def process_test_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    processed_data = []
    question_id = 1  # Bi·∫øn ƒë·∫øm t·ª± ƒë·ªông

    print(f"üîÑ B·∫Øt ƒë·∫ßu x·ª≠ l√Ω {len(data)} ƒëi·ªÉm d·ªØ li·ªáu v·ªõi {MAX_THREADS} lu·ªìng...\n")
    start_time = time.time()  # B·∫Øt ƒë·∫ßu ƒëo th·ªùi gian

    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        future_to_entry = {executor.submit(query_wikidata, entry.get("sparql_wikidata", "")): entry for entry in data}
        
        for future in tqdm(as_completed(future_to_entry), total=len(data), desc="‚è≥ ƒêang x·ª≠ l√Ω", unit="c√¢u"):
            entry = future_to_entry[future]
            try:
                answers = future.result()
                if not answers:
                    continue  # B·ªè qua ƒëi·ªÉm d·ªØ li·ªáu kh√¥ng c√≥ c√¢u tr·∫£ l·ªùi

                # X·ª≠ l√Ω ti·∫øp n·∫øu c√≥ c√¢u tr·∫£ l·ªùi
                sparql_query = entry.get("sparql_wikidata", "")
                question = preprocess_question(entry.get("question", ""))
                s_expression = entry.get("s_expression", "")
                entities, relations = extract_entities_relations(sparql_query)

                # Truy v·∫•n song song ƒë·ªÉ l·∫•y nh√£n th·ª±c th·ªÉ v√† quan h·ªá
                entity_map = {}
                relation_map = {}

                entity_futures = {executor.submit(get_wikidata_label, qid): qid for qid in entities}
                relation_futures = {executor.submit(get_wikidata_label, pid): pid for pid in relations}

                for future in as_completed(entity_futures):
                    entity_map[entity_futures[future]] = future.result()

                for future in as_completed(relation_futures):
                    relation_map[relation_futures[future]] = future.result()

                # Chuy·ªÉn s_expression th√†nh nor_s_expr
                nor_s_expr = convert_s_expression_to_nor_s_expr(s_expression, entity_map, relation_map) if s_expression else ""

                processed_data.append({
                    "question_id": question_id,
                    "question": question,
                    "sparql": sparql_query,
                    "answer": answers,
                    "s_expr": s_expression,
                    "nor_s_expr": nor_s_expr,
                    "gold_entity_map": entity_map,
                    "gold_relation_map": relation_map
                })
                question_id += 1  # TƒÉng s·ªë th·ª© t·ª±

            except Exception as e:
                print(f"‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi {question_id}: {e}")

    end_time = time.time()  # K·∫øt th√∫c ƒëo th·ªùi gian
    total_time = end_time - start_time

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(processed_data, f, indent=4, ensure_ascii=False)

    print(f"\n‚úÖ X·ª≠ l√Ω ho√†n t·∫•t! K·∫øt qu·∫£ ƒë∆∞·ª£c l∆∞u v√†o {output_file}")
    print(f"‚è≥ T·ªïng th·ªùi gian x·ª≠ l√Ω: {total_time:.2f} gi√¢y ({total_time/60:.2f} ph√∫t)\n")

# Ch·∫°y ch∆∞∆°ng tr√¨nh
input_file = "s_expression/test_with_s_expression.json"
output_file = "filter/test_filtered.json"
process_test_file(input_file, output_file)


üîÑ B·∫Øt ƒë·∫ßu x·ª≠ l√Ω 5590 ƒëi·ªÉm d·ªØ li·ªáu v·ªõi 10 lu·ªìng...



‚è≥ ƒêang x·ª≠ l√Ω:  29%|‚ñà‚ñà‚ñâ       | 1617/5590 [26:16<59:09,  1.12c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 1274: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  38%|‚ñà‚ñà‚ñà‚ñä      | 2112/5590 [32:49<1:00:45,  1.05s/c√¢u]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 1656: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 2754/5590 [41:22<39:32,  1.20c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 2175: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 3165/5590 [46:42<28:01,  1.44c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 2501: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 3205/5590 [47:09<43:57,  1.11s/c√¢u]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 2529: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 3211/5590 [47:14<38:42,  1.02c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 2534: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 3635/5590 [52:42<26:13,  1.24c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 2868: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 4018/5590 [58:13<26:55,  1.03s/c√¢u]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3188: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 4367/5590 [1:02:59<20:02,  1.02c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3470: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 4506/5590 [1:04:48<16:40,  1.08c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3581: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 4613/5590 [1:06:09<12:13,  1.33c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3660: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 4655/5590 [1:06:43<14:34,  1.07c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3693: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 4705/5590 [1:07:23<10:23,  1.42c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3731: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 5012/5590 [1:11:23<08:34,  1.12c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3983: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5590/5590 [1:19:04<00:00,  1.18c√¢u/s]



‚úÖ X·ª≠ l√Ω ho√†n t·∫•t! K·∫øt qu·∫£ ƒë∆∞·ª£c l∆∞u v√†o test_filtered.json
‚è≥ T·ªïng th·ªùi gian x·ª≠ l√Ω: 4744.77 gi√¢y (79.08 ph√∫t)



In [None]:
input_file2 = "s_expression/train_with_s_expression.json"
output_file2 = "filter/train_filtered.json"
process_test_file(input_file2, output_file2)

üîÑ B·∫Øt ƒë·∫ßu x·ª≠ l√Ω 22462 ƒëi·ªÉm d·ªØ li·ªáu v·ªõi 10 lu·ªìng...



‚è≥ ƒêang x·ª≠ l√Ω:   2%|‚ñè         | 501/22462 [21:54<5:33:06,  1.10c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 385: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:   3%|‚ñé         | 576/22462 [22:51<4:10:25,  1.46c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 442: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:   3%|‚ñé         | 717/22462 [24:46<5:31:42,  1.09c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 559: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:   5%|‚ñç         | 1013/22462 [28:36<6:00:32,  1.01s/c√¢u]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 787: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:   5%|‚ñå         | 1211/22462 [31:10<4:24:48,  1.34c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 942: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  11%|‚ñà         | 2484/22462 [47:13<5:11:03,  1.07c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 1929: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  17%|‚ñà‚ñã        | 3860/22462 [1:05:38<3:38:23,  1.42c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3019: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  17%|‚ñà‚ñã        | 3891/22462 [1:05:58<3:40:16,  1.41c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3040: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  21%|‚ñà‚ñà‚ñè       | 4817/22462 [1:18:29<3:24:39,  1.44c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3789: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  22%|‚ñà‚ñà‚ñè       | 4931/22462 [1:19:56<3:39:28,  1.33c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3879: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  22%|‚ñà‚ñà‚ñè       | 4976/22462 [1:20:31<4:19:47,  1.12c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 3916: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  23%|‚ñà‚ñà‚ñé       | 5256/22462 [1:24:19<3:52:43,  1.23c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 4132: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  25%|‚ñà‚ñà‚ñç       | 5527/22462 [1:28:01<3:43:40,  1.26c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 4348: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  27%|‚ñà‚ñà‚ñã       | 5978/22462 [1:34:01<4:33:07,  1.01c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 4712: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  29%|‚ñà‚ñà‚ñâ       | 6463/22462 [1:40:18<4:22:54,  1.01c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 5082: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  32%|‚ñà‚ñà‚ñà‚ñè      | 7258/22462 [1:50:43<4:03:38,  1.04c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 5716: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  35%|‚ñà‚ñà‚ñà‚ñå      | 7888/22462 [1:59:26<2:12:00,  1.84c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 6210: Response ended prematurely


‚è≥ ƒêang x·ª≠ l√Ω:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 9624/22462 [2:22:49<3:05:01,  1.16c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 7595: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 11052/22462 [2:42:12<1:40:12,  1.90c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 8731: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 11292/22462 [2:45:20<2:20:06,  1.33c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 8909: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 11475/22462 [2:47:46<2:38:29,  1.16c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 9052: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 11674/22462 [2:50:28<2:50:13,  1.06c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 9201: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 12819/22462 [3:06:43<1:59:38,  1.34c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 10101: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 14774/22462 [3:34:11<1:56:05,  1.10c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 11658: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 15053/22462 [3:37:39<2:05:24,  1.02s/c√¢u]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 11858: Response ended prematurely


‚è≥ ƒêang x·ª≠ l√Ω:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 16002/22462 [3:50:27<1:06:04,  1.63c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 12618: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 18278/22462 [4:21:21<46:37,  1.50c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 14438: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 18598/22462 [4:25:20<49:59,  1.29c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 14682: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 18991/22462 [4:30:16<49:05,  1.18c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 14987: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 19835/22462 [4:40:39<37:43,  1.16c√¢u/s]  

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 15617: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 20158/22462 [4:44:39<43:00,  1.12s/c√¢u]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 15854: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 20825/22462 [4:53:09<23:31,  1.16c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 16367: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 20972/22462 [4:54:47<12:20,  2.01c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 16473: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 21288/22462 [4:59:09<16:59,  1.15c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 16718: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 21545/22462 [5:02:32<13:35,  1.12c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 16923: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 22030/22462 [5:08:51<05:10,  1.39c√¢u/s]

‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 17302: expected string or bytes-like object


‚è≥ ƒêang x·ª≠ l√Ω: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 22462/22462 [5:14:06<00:00,  1.19c√¢u/s]


‚ùå L·ªói khi x·ª≠ l√Ω c√¢u h·ªèi 17614: Expecting ':' delimiter: line 15270484 column 17 (char 360513536)

‚úÖ X·ª≠ l√Ω ho√†n t·∫•t! K·∫øt qu·∫£ ƒë∆∞·ª£c l∆∞u v√†o train_filtered.json
‚è≥ T·ªïng th·ªùi gian x·ª≠ l√Ω: 18846.94 gi√¢y (314.12 ph√∫t)

