In [None]:
# Scraping from CAP Danse festival
# Last SCRAPING RUN : Sept 2025 (check last modifications dates on git)

In [None]:
# Note : you need to have scraped events from the website first.
# You can use "Table Capture" extension for Browser to export the table to CSV.

In [None]:
# Import libs
import sys
import os
import git

# Ajoute le dossier "ressources" au sys.path
git_root = git.Repo(search_parent_directories=True).working_tree_dir
sys.path.insert(0,   os.path.abspath(  os.path.join(  git_root,'api' ) ) )

import script.libs.utils as utils
import script.libs.scraping_utils as scraping_utils
import script.libs.HttpRequests as HttpRequests
from script.configuration import config, oa
from script.libs.getOaLocation import get_or_create_oa_location

from slugify import slugify
import json
from pprint import pprint

In [None]:
# Constant
access_token = oa.getToken()

In [None]:
# Read existing CSV
file_name="./Cap_Danse_2025.csv"
all_events = utils.read_csv(file_name)

In [None]:

def create_CAPDanse_OA_event(event:dict)->dict:
        """Get data from existing CapDanse csv and scraping of each event. Returns Dict with OA keys"""
        keyword = event.get('keywords')
        event_url=event.get('links')
        # Default locationId : 11634941
        location_uid = get_or_create_oa_location(   event.get('location'),
                                                        access_token,
                                                        oa.public_key,
                                                        f"{config.OA_API_URL}/locations")
        long_description=event.get('mention')+ os.linesep + event.get('title') + ". " + event.get('description')
        description=event.get('title') + ". " + event.get('description')
        date_begin=scraping_utils.get_datetime_from_text(event.get('date')).replace(year=2025) # Keeps day & month but avoid assigning the wrong year. 
        duree="1h" # default value
        date_end=utils.get_end_date(date_begin, duree)

        eventOA= {
                        "uid-externe": "scrap-" + event.get('Id') + "-" + slugify(event.get('title')),
                        "title": { "fr": event.get('title')  } ,
                        "description": { "fr": description},
                        "locationUid": int(location_uid),
                        "links": event_url,
                        "longDescription": long_description,   
                        "keywords": {
                                "fr": ["CapDanse", keyword ]
                                },
                        "timings": [
                                {
                                "begin": date_begin.isoformat(),
                                "end": date_end.isoformat()
                                },
                                ],
                        "attendanceMode": 3,
                        "onlineAccessLink": event_url,
                }
        return eventOA


In [None]:
# First create a Json file with all valid events
OAEvents=[]
for event in all_events:
    oa_event = create_CAPDanse_OA_event(event)
    if oa_event:
        OAEvents.append(oa_event)
    utils.save_dict_to_json_file(OAEvents, "eventsCapDanse2ToPost.json")

In [None]:
# Then post them and saved them (with attributed unique ID form OA) in a json file
# to update them or restart from last success in case of failing
saved_events_capv2={}
with open('eventsCapDanse2ToPost.json') as json_file:
    eventsv2 = json.load(json_file)
for event in eventsv2:
    try :
        response = HttpRequests.create_event(access_token,event = event)
        uid = response['event']['uid'] if response['event'] else event['uid-externe'] 
        saved_events_capv2[response['event']['uid'] ] = event
        utils.save_dict_to_json_file(saved_events_capv2, "eventsCapDanse2Created.json")
    except Exception as e:
        print(f"Error creating event {event.get('title')} : {e}")
        continue
    

In [None]:
# CORRECTIONS SI NECESSAIRE.
# DEFAULT = False
# Update the events with online access link and attendance mode
correction = False
for event in [allMatchEvents[0]]:
    if not correction:
        print("Corrections not executed")
        break
    # print(event.get("title"))
    try:
        response= HttpRequests.search_events(oa.public_key, event.get("title"))
        if not response.get("events"):
            raise ValueError(f"No events return for {event.get("title")} found in OA {response}")
        event= response.get("events")[0]
        if not event.get("uid"):
            raise ValueError(f"Event returned for {event.get("title")} is empty {response.get("events")}")
    except Exception as e:
        print(f"Error in event search: {e}")
        continue
    # pprint.pprint(event)
    eventUID= event.get("uid")
    eventTitle= event.get("title")
    print(eventUID, eventTitle)
    
    #EXEMPLE DE CORRECTION
    # event["onlineAccessLink"] = event.get("lien")
    # event["attendanceMode"] = 3  # 1=offline, 2=online, 3=hybrid
    # event["keywords"] = { "fr" : event.get("keywords")}
    
    HttpRequests.delete_event(
        access_token,
        eventUID
    )
    print("--------")