In [3]:
import unicodedata
import pytz
import requests
from ics import Calendar
from datetime import datetime
import json
import re

# üîó Liens ICS et fichiers de sortie
CALENDARS = [
    {
        "name": "UNIV",
        "url": "http://planning.univ-lemans.fr/jsp/custom/modules/plannings/anonymous_cal.jsp?resources=7209&projectId=08&calType=ical&nbWeeks=52",
        "output": "univ_events.json"
    },
    {
        "name": "UNIV2",
        "url": "http://planning.univ-lemans.fr/jsp/custom/modules/plannings/anonymous_cal.jsp?resources=809&projectId=08&calType=ical&nbWeeks=52",
        "output": "univ_events2.json"
    }
]

TIMEZONE = pytz.timezone("Europe/Paris")
jours_fr = ["Lundi", "Mardi", "Mercredi", "Jeudi", "Vendredi", "Samedi", "Dimanche"]

formations_regex = r"""
    (?:M\d?\s+)?                  
    (MOSAE|URBANISTIC|MATTERRE|MIDEC|
     VILLE\s+ET\s+ENVIRONNEMENT[S]*\s*URBAIN[S]*|
     GEOGRAPHIE.*?DEVELOPPEMENT|
     HISTOIRE.*?PATRIMOINE|
     DDL.*?|LEA.*?|MEEF.*?|
     LP.*?|UEO.*?)
"""

# ----------------------------------------------------------
# UTILITAIRES
# ----------------------------------------------------------

def clean_text(text):
    if not text:
        return ""
    text = unicodedata.normalize("NFC", text)
    return text.replace("\n", " ").replace("\r", "").strip()

def extract_title(raw_title):
    line = raw_title.split("\\n")[0].strip()
    line = re.sub(r"^\s*\d+(?=\s*(TD|TP|CM)\b)", "", line, flags=re.IGNORECASE)
    line = re.sub(r"^\s*G\d+\s+", "", line, flags=re.IGNORECASE)
    line = re.sub(formations_regex, "", line, flags=re.IGNORECASE | re.VERBOSE).strip()
    return line

# ----------------------------------------------------------
# EXTRACTION ENSEIGNANT + SALLE DESCRIPTIONS ADE
# ----------------------------------------------------------

def extract_enseignant_and_salle(description, fallback_title=None):
    """
    Analyse ligne par ligne :
      HIVERT      ‚Üê enseignant
      C100        ‚Üê salle
      MOSAE...
    """
    if not description:
        return "non renseign√©", "non renseign√©e"

    # D√©coupe en lignes propres
    lines = [l.strip() for l in description.splitlines() if l.strip()]

    enseignant = None
    salle = None

    for i, line in enumerate(lines):

        # Ligne = nom enseignant (MAJ, lettres, espaces et - )
        if re.fullmatch(r"[A-Z√â√à√ä√é√õ√Ä√Ñ√ñ√ú\- ]{3,}", line):
            enseignant = line.title()  # HIVERT ‚Üí Hivert
            # ligne suivante = salle ?
            if i + 1 < len(lines):
                next_line = lines[i+1]
                # Salle du style C100, B203, L12...
                if re.fullmatch(r"[A-Za-z]\d{2,4}", next_line):
                    salle = next_line.upper()
            break

    return enseignant or "non renseign√©", salle or "non renseign√©e"

# ----------------------------------------------------------
# FALLBACK : REGEX SALLE G√âN√âRIQUE
# ----------------------------------------------------------

def extract_salle_fallback(description):
    if not description:
        return "non renseign√©e"

    desc = description.strip()

    labels = [
        "Salle", "Amphi", "Amphith√©√¢tre",
        "B√¢timent", "Salle informatique"
    ]
    label_regex = "|".join([re.escape(l) for l in labels])

    # Format propre : Label + code court
    m = re.search(
        rf"\b(?P<label>{label_regex})\b\s*(?P<code>[A-Za-z0-9][A-Za-z0-9\-./_]{{0,8}})",
        desc,
        re.IGNORECASE
    )

    if m:
        label = m.group("label")
        code = m.group("code").upper()
        if label.lower() == "salle informatique":
            return f"Salle informatique {code}"
        return f"{label.capitalize()} {code}"

    # fallback plus large
    m2 = re.search(
        rf"\b({label_regex})\b\s*([^\n,();‚Äî-]{{1,80}})",
        desc,
        flags=re.IGNORECASE
    )
    if m2:
        segment = m2.group(0).strip()
        segment = re.split(r"[,(;‚Äî]|  {2,}", segment)[0].strip()
        segment = re.sub(r"\b([A-Z√â√à√ä√õ√é]{2,}(?:\s+[A-Z√â√à√ä√õ√é]{2,})*)", "", segment).strip()
        return segment[:40] or "non renseign√©e"

    return "non renseign√©e"

# ----------------------------------------------------------
# FORMATAGE FINAL
# ----------------------------------------------------------

def format_event(event, maj_str):
    dtstart_utc = event.begin.datetime.replace(tzinfo=pytz.UTC)
    dtend_utc = event.end.datetime.replace(tzinfo=pytz.UTC)

    dtstart = dtstart_utc.astimezone(TIMEZONE)
    dtend = dtend_utc.astimezone(TIMEZONE)

    day = jours_fr[dtstart.weekday()]
    date_str = dtstart.strftime("%d/%m/%Y")
    start_str = dtstart.strftime("%H:%M")
    end_str = dtend.strftime("%H:%M")

    raw_title = clean_text(event.name or "Sans titre")
    description = event.description or ""
    location = clean_text(event.location or "")

    title = extract_title(raw_title)

    # Extraction enseignant + salle ADE
    enseignant, salle_detectee = extract_enseignant_and_salle(description, title)

    # Salle fallback regex
    salle = salle_detectee
    if salle == "non renseign√©e":
        salle = extract_salle_fallback(description)

    # Cas sp√©cial ESGT
    if salle == "non renseign√©e" and "salle esgt" in location.lower():
        salle = "Salle ESGT"

    return {
        "day": day,
        "date": date_str,
        "start": start_str,
        "end": end_str,
        "title": title,
        "salle": salle,
        "enseignant": enseignant,
        "maj": maj_str
    }

# ----------------------------------------------------------
# PROCESSUS GLOBAL
# ----------------------------------------------------------

def process_calendar(name, url, output_file):
    print(f"üì° T√©l√©chargement du calendrier {name}...")
    response = requests.get(url)
    if response.status_code != 200:
        print(f"‚ùå Erreur {response.status_code} pour {name}")
        return

    calendar = Calendar(response.text)
    events = []

    maj_str = datetime.now(TIMEZONE).strftime("%d-%m-%Y %H:%M:%S")

    for event in calendar.events:
        if event.begin and event.end:
            dtstart = event.begin.datetime.astimezone(TIMEZONE)
            if dtstart.weekday() < 5:  # Lundi ‚Üí Vendredi
                evt = format_event(event, maj_str)
                events.append(evt)
                print(f"‚úîÔ∏è {name} : {evt['title']} ({evt['date']} {evt['start']}-{evt['end']})")
            else:
                print(f"‚è≠Ô∏è {name} : Ignor√© (weekend) : {event.name}")

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(events, f, indent=2, ensure_ascii=False)

    print(f"‚úÖ {len(events)} √©v√©nements extraits pour {name}.")
    print(f"üìÑ Fichier g√©n√©r√© : {output_file}\n")

def main():
    for cal in CALENDARS:
        process_calendar(cal["name"], cal["url"], cal["output"])

if __name__ == "__main__":
    main()


üì° T√©l√©chargement du calendrier UNIV...
‚úîÔ∏è UNIV : CONFERENCE COP - Territoire Durable et Neutralit√© Carbone (08/12/2025 09:30-17:30)
‚úîÔ∏è UNIV : TD LANGUE (15/12/2025 13:30-15:30)
‚úîÔ∏è UNIV : TD LANGUE (16/12/2025 13:30-15:30)
‚úîÔ∏è UNIV : 1 Jumeau num√©rique des territoires (23/01/2026 08:30-12:45)
‚úîÔ∏è UNIV : 1 Jumeau num√©rique des territoires (30/01/2026 08:30-12:45)
‚úîÔ∏è UNIV : TD Concevoir un observatoire de Territoires S.C. (09/12/2025 15:30-17:00)
‚úîÔ∏è UNIV : TD Concevoir un observatoire de Territoires S.C. (02/12/2025 15:30-17:00)
‚úîÔ∏è UNIV : 1 Initiation au Drone (14/01/2026 14:00-16:45)
‚úîÔ∏è UNIV : 1 Initiation au Drone (05/02/2026 08:30-12:45)
‚úîÔ∏è UNIV : 1 Initiation au Drone (05/02/2026 14:00-18:15)
‚úîÔ∏è UNIV : 1 Initiation au Drone (06/02/2026 14:00-18:15)
‚úîÔ∏è UNIV : 1 Initiation au Drone (06/02/2026 08:30-12:45)
‚úîÔ∏è UNIV : 1EXAMEN  - ESGT (04/02/2026 08:30-12:45)
‚úîÔ∏è UNIV : 1 Initiation au Drone (14/01/2026 08:30-12:45)
‚úîÔ∏è UNIV :