# Import seen links into DB (preliminary stage)

This notebook reads `data/seen_links.txt` and inserts missing URLs into the CRM database as `preliminary` listings.

In [1]:
from config import SEEN_LISTINGS_FILE
from webapp.database.database import SessionLocal
from webapp.database.models import Listing
from sqlalchemy import func
import os

STAGE_PRELIMINARY = "preliminary"

if not SEEN_LISTINGS_FILE or not os.path.exists(SEEN_LISTINGS_FILE):
    raise FileNotFoundError(f"Seen links file not found: {SEEN_LISTINGS_FILE}")

with open(SEEN_LISTINGS_FILE, "r", encoding="utf-8") as handle:
    urls = [line.strip() for line in handle if line.strip()]

unique_urls = list(dict.fromkeys(urls))
print(f"Loaded {len(urls)} URLs ({len(unique_urls)} unique) from {SEEN_LISTINGS_FILE}")

db = SessionLocal()
try:
    existing = db.query(Listing.idealista_url).filter(Listing.idealista_url.in_(unique_urls)).all()
    existing_urls = {row[0] for row in existing}

    max_pos = db.query(func.max(Listing.position)).filter(Listing.stage == STAGE_PRELIMINARY).scalar() or 0

    created = 0
    for url in unique_urls:
        if url in existing_urls:
            continue
        max_pos += 1
        db.add(Listing(
            title="Seen listing",
            idealista_url=url,
            stage=STAGE_PRELIMINARY,
            position=max_pos,
            source="seen_import",
        ))
        existing_urls.add(url)
        created += 1

    if created:
        db.commit()
    print(f"Imported {created} new URLs into preliminary stage")
finally:
    db.close()


Loaded 177 URLs (144 unique) from /Users/max/Documents/Documents_max/Projects/IdealistaBot/data/seen_links.txt
Imported 144 new URLs into preliminary stage
