<a href="https://colab.research.google.com/github/bkristensen/Colabs/blob/main/Teo/Hent_synonymer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installere moduler

In [None]:
!pip install gspread oauth2client pyppeteer nest_asyncio




# Importér moduler

In [6]:
import gspread
import sqlite3

import asyncio
import nest_asyncio

import pyppeteer

import google.auth

# from google.colab import auth

# from pyppeteer import launch
# from pyppeteer import chromium_downloader

# Config

In [7]:
conf = {
  "drive_dir":      "/content/drive",
  "data_dir":       "/content/drive/MyDrive/Data",
  "database_dir":   "/content/drive/MyDrive/Data/Synonyms.db"
}

print(conf)

{'drive_dir': '/content/drive', 'data_dir': '/content/drive/MyDrive/Data', 'database_dir': '/content/drive/MyDrive/Data/Synonyms.db'}


# Init

In [8]:
nest_asyncio.apply()
# await pyppeteer.chromium_downloader.download_chromium()

### Test Pypeteer browser

In [9]:

async def test_browser():
    browser = await pyppeteer.launch(headless=True, args=["--no-sandbox"])
    page = await browser.newPage()
    await page.goto("https://example.com")
    content = await page.content()
    await browser.close()
    print("Browser virker!")

await test_browser()

Browser virker!


### Opret adgang til Google drev

In [10]:
# Godkend adgang til Google sheets
google.colab.auth.authenticate_user()

# Brug google.auth i stedet for oauth2client
creds, _ = google.auth.default()
gc = gspread.authorize(creds)

google.colab.drive.mount(conf["drive_dir"])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Hent Pyppeter browser pakke

# SQLite handlinger

### Opret SQLite database

In [11]:
# Opret forbindelse til SQLite-database
conn = sqlite3.connect(conf['database_dir'])
cursor = conn.cursor()

# Opret tabeller
cursor.executescript("""
CREATE TABLE IF NOT EXISTS Tags (
    Id INTEGER PRIMARY KEY AUTOINCREMENT,
    Tag STRING(100) UNIQUE NOT NULL
);

CREATE TABLE IF NOT EXISTS Synonyms (
    Id INTEGER PRIMARY KEY AUTOINCREMENT,
    Synonym STRING(100) UNIQUE NOT NULL
);

CREATE TABLE IF NOT EXISTS TagSynonyms (
    TId INTEGER NOT NULL,
    SId INTEGER NOT NULL,
    PRIMARY KEY (TId, SId),
    FOREIGN KEY (TId) REFERENCES Tags(Id),
    FOREIGN KEY (SId) REFERENCES Synonyms(Id)
);

CREATE TABLE IF NOT EXISTS TagIds (
    TId INTEGER NOT NULL,
    TagId INTEGER NOT NULL,
    PRIMARY KEY (TId, TagId),
    FOREIGN KEY (TId) REFERENCES Tags(Id)
);
""")

conn.commit()


### Gem ord og synonym

In [12]:
def gem_ord_med_synonymer(ordet, tagids):
    # Indsæt ord (hvis det ikke allerede findes)
    cursor.execute("SELECT Id FROM Tags WHERE Tag = ?", (ordet,))
    new_tag = False
    tag_id = cursor.fetchone()
    if not tag_id:
      new_tag = True
      cursor.execute("INSERT OR IGNORE INTO Tags (Tag) VALUES (?)", (ordet,))
      cursor.execute("SELECT Id FROM Tags WHERE Tag = ?", (ordet,))
      tag_id = cursor.fetchone()[0]

    if tagids:
      for tagid in tagids:
        cursor.execute("""
            INSERT OR IGNORE INTO TagIds (TId, TagId)
            VALUES (?, ?)
        """, (tag_id, tagid))

    if new_tag:
      print(f"Henter synonymer for: {ordet}")
      syns = asyncio.get_event_loop().run_until_complete(hent_synonymer(ordet))
      if syns:
        for syn in syns:
          n = f"{syn}".capitalize()
          cursor.execute("INSERT OR IGNORE INTO Synonyms (Synonym) VALUES (?)", (n,))
          cursor.execute("SELECT Id FROM Synonyms WHERE Synonym = ?", (n,))
          synonym_id = cursor.fetchone()[0]

          # Link kun hvis ikke allerede linket
          cursor.execute("""
            INSERT OR IGNORE INTO TagSynonyms (TId, SId)
            VALUES (?, ?)
          """, (tag_id, synonym_id))


    conn.commit()



In [13]:

async def hent_synonymer(ordet):
    url = f"https://synonymet.dk/ord/{ordet}"
    browser = await pyppeteer.launch(headless=True, args=["--no-sandbox"])
    page = await browser.newPage()
    await page.goto(url)

    try:
        await page.waitForSelector(".wordcloud-span", timeout=3000)
        elementer = await page.querySelectorAll(".wordcloud-span")
        synonymer = []
        for el in elementer:
            tekst = await page.evaluate('(el) => el.textContent', el)
            if tekst.strip():
                synonymer.append(tekst.strip())
    except:
        synonymer = []

    await browser.close()
    return synonymer

In [14]:
# Åbn arket og vælg det første ark
sheet = gc.open("Synonymer").get_worksheet(3)
print(sheet.spreadsheet_id)
# Hent alle ord fra kolonne A, start fra række 2
ordliste_ids = sheet.col_values(1)[1:]
ordliste_tags = sheet.col_values(2)[1:]

print(ordliste_ids)
print(ordliste_tags)

1366Aj-KBdx6Xvt4rjMvpnPjQVf_Hkepyn3JnF1KQodU
['311', '84', '322', '264', '5', '335', '308', '373', '104', '202', '183', '308', '74', '33', '173', '32', '102', '365', '217', '294', '50', '76', '342', '33', '114', '109', '24', '56', '56', '199', '299', '5', '100', '2', '364', '105', '17', '46', '52', '75', '151', '82', '372', '267', '290', '266', '261', '351', '265', '159', '178', '3', '123', '226', '78', '20', '20', '273', '273', '335', '195', '61', '317', '208', '331', '120', '118', '95', '114', '301', '330', '116', '116', '116', '189', '52', '53', '293', '44', '70', '310', '73', '164', '340', '235', '163', '48', '267', '31', '286', '226', '173', '217', '221', '23', '31', '68', '368', '100', '47', '332', '254', '265', '111', '111', '304', '188', '20', '326', '183', '313', '327', '300', '138', '138', '306', '364', '4', '4', '196', '314', '275', '67', '79', '313', '89', '112', '326', '343', '326', '343', '181', '296', '184', '184', '282', '272', '133', '21', '47', '23', '21', '51', '61',

In [15]:
resultater = []
#conn = sqlite3.connect(conf['database_dir'])

for i, ordet in enumerate(ordliste_tags):
    # print(f"Henter synonymer for: {ordet}")
    """
    syns = asyncio.get_event_loop().run_until_complete(hent_synonymer(ordet))
    if syns:
      for syn in syns:
        n = f"{syn}".capitalize()
        gem_ord_med_synonymer(ordet.capitalize(), [n], [ordliste_ids[i]])
        # kol += 1
        # sheet.update_cell(i + 2, kol, syn)

    tekst = ", ".join(syns) if syns else "Ingen synonymer fundet"

    resultater.append(tekst)

    # Skriv i kolonne B (række i + 2 pga. overskrift)
    # sheet.update_cell(i + 2, 2, tekst)
    """
    gem_ord_med_synonymer(ordet.capitalize(), [ordliste_ids[i]])
    # kol += 1
    # sheet.update_cell(i + 2, kol, syn)
#conn.close()

Henter synonymer for: Abort
Henter synonymer for: Adfærd
Henter synonymer for: Adlyde
Henter synonymer for: Advarsel
Henter synonymer for: Afgud
Henter synonymer for: Afkom
Henter synonymer for: Afstand
Henter synonymer for: Aftale
Henter synonymer for: Agressiv
Henter synonymer for: Alderdom
Henter synonymer for: Alkohol
Henter synonymer for: Anderledes
Henter synonymer for: Anger
Henter synonymer for: Angst
Henter synonymer for: Ansvar
Henter synonymer for: Arbejdspunkt
Henter synonymer for: Artikel
Henter synonymer for: Arve
Henter synonymer for: Bagtaler
Henter synonymer for: Barmhjertig
Henter synonymer for: Begivenhed
Henter synonymer for: Begærlig
Henter synonymer for: Behov
Henter synonymer for: Bekymring
Henter synonymer for: Beskeden
Henter synonymer for: Beskyttelse
Henter synonymer for: Betydning
Henter synonymer for: Bibelen
Henter synonymer for: Bibellæsning
Henter synonymer for: Bidrag
Henter synonymer for: Billedanalyse
Henter synonymer for: Billeddyrkelse
Henter synony