In [None]:
# Check available languages
file_path = "MRCONSO.RRF"
unique_lats = set()

with open(file_path, "r", encoding="latin-1", errors="ignore") as f:
    for line in f:
        parts = line.rstrip("\n").split("|")
        if len(parts) > 1:
            unique_lats.add(parts[1])

print("Einzigartige LAT-Codes:")
for lat in sorted(unique_lats):
    print(" -", lat)

In [None]:
# Import of Total: 10492333, ENG: 10492333

In [None]:
from pymongo import MongoClient
import os

# 1) Installiere pymongo falls nötig:
#    !pip install pymongo

# 2) Verbindung zu MongoDB
client = MongoClient("mongodb://localhost:27018/")
db = client["umls"]
collection = db["mrconso-eng"]
collection.drop()  # frisch beginnen

# 3) Pfad zur RRF
file_path = "MRCONSO.RRF"
assert os.path.exists(file_path), "MRCONSO.RRF nicht gefunden!"

# 4) Einfügen nur von ENG
batch_size = 10000
batch = []
cols = [
    "CUI","LAT","TS","LUI","STT","SUI","ISPREF",
    "AUI","SAUI","SCUI","SDUI","SAB","TTY","CODE","STR","SRL"
]

with open(file_path, "r", encoding="latin-1", errors="ignore") as f:
    for line in f:
        parts = line.rstrip("\n").split("|")
        if len(parts) < len(cols):
            continue
        lat = parts[1]
        if lat not in ("ENG"):
            continue  # Hier filtern wir OUT alles außer ENG/DEU
        
        # Dokument nur dann bauen und einschieben
        doc = {cols[i]: parts[i] for i in range(len(cols))}
        batch.append(doc)
        if len(batch) >= batch_size:
            collection.insert_many(batch)
            batch = []
    if batch:
        collection.insert_many(batch)

# 5) Indexe für schnelle Suche
collection.create_index("LAT")
collection.create_index("CUI")
collection.create_index([("STR", "text")])

# 6) Kontrolle
total = collection.count_documents({})
eng = collection.count_documents({"LAT": "ENG"})
# deu = collection.count_documents({"LAT": "DEU"})
print(f"Import abgeschlossen. Total: {total}, ENG: {eng}")

In [None]:
# Import of Total: 280301, DEU: 280301

In [None]:
from pymongo import MongoClient
import os

# 1) Installiere pymongo falls nötig:
#    !pip install pymongo

# 2) Verbindung zu MongoDB
client = MongoClient("mongodb://localhost:27018/")
db = client["umls"]
collection = db["mrconso-ger"]
collection.drop()  # frisch beginnen

# 3) Pfad zur RRF
file_path = "MRCONSO.RRF"
assert os.path.exists(file_path), "MRCONSO.RRF nicht gefunden!"

# 4) Einfügen nur von GER
batch_size = 10000
batch = []
cols = [
    "CUI","LAT","TS","LUI","STT","SUI","ISPREF",
    "AUI","SAUI","SCUI","SDUI","SAB","TTY","CODE","STR","SRL"
]

with open(file_path, "r", encoding="latin-1", errors="ignore") as f:
    for line in f:
        parts = line.rstrip("\n").split("|")
        if len(parts) < len(cols):
            continue
        lat = parts[1]
        if lat != ("GER"):
            continue  # Hier filtern wir OUT alles außer DEU
        
        # Dokument nur dann bauen und einschieben
        doc = {cols[i]: parts[i] for i in range(len(cols))}
        batch.append(doc)
        if len(batch) >= batch_size:
            collection.insert_many(batch)
            batch = []
    if batch:
        collection.insert_many(batch)

# 5) Indexe für schnelle Suche
collection.create_index("LAT")
collection.create_index("CUI")
collection.create_index([("STR", "text")])

# 6) Kontrolle
total = collection.count_documents({})
ger = collection.count_documents({"LAT": "GER"})
print(f"Import abgeschlossen. Total: {total}, DEU: {ger}")