In [10]:
# Cell 1: Import Libraries and Set Configuration
# ----------------------------------------------
# This cell imports the necessary libraries and sets up the configuration for the script.
# It defines the URL for downloading the official Excel file, a fallback CSV file in case the download fails,
# and the column names for filtering the data later.

# Import necessary libraries
import pandas as pd
import requests
from io import BytesIO
from datetime import datetime
import sys  # To potentially exit if files can't be loaded

# --- Configuration ---
# URL of the official Excel file
ater_url = "https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ATERListesOffresPubliees/ATEROffres_publiees_annee.xls"
# Fallback local CSV filename (if URL download fails)
fallback_csv = "ATER.csv"

# Columns we are interested in
profile_col = 'Profil appel à candidatures'
date_col = 'Date cloture candidature'

print("Libraries imported and configuration set.")
print(f"Configured URL: {ater_url}")
print(f"Fallback CSV: {fallback_csv}")
print(f"Profile column: {profile_col}")
print(f"Date column: {date_col}")

Libraries imported and configuration set.
Configured URL: https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ATERListesOffresPubliees/ATEROffres_publiees_annee.xls
Fallback CSV: ATER.csv
Profile column: Profil appel à candidatures
Date column: Date cloture candidature


In [16]:
# Columns we are interested in
columns_of_interest = [
    'Etablissement de rattachement',  # Institution
    'Profil appel à candidatures',     # Job profile
    'Date cloture candidature',        # Application deadline
    'Localisation appel à candidatures',  # Location
    'Quotité du poste',                  # Workload
    'Etat du poste',                      # Status
    'Type du poste'                      # Type of position
]
def load_and_filter_data():# --- Filtering and Display ---

if df is not None and keyword and target_date:
    print(f"\n--- Filtering Results ---")
    print(f"Filtering for keyword '{keyword}' in '{profile_col}'")
    print(f"Filtering for closing date on or after {target_date.strftime('%d/%m/%Y')} in '{date_col}'")

    # Apply keyword filter (case-insensitive)
    # 'na=False' ensures rows with NaN in the profile column are excluded
    keyword_filter = df[profile_col].str.contains(keyword, case=False, na=False)

    # Apply date filter
    # Ensure the date column is in datetime format before comparison
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce') # Convert to datetime, errors='coerce' will turn invalid dates into NaT
    date_filter = (df[date_col] >= target_date) & (df[date_col].notna())

    # Combine filters
    filtered_df = df[keyword_filter & date_filter]

    # --- Display Results ---
    if not filtered_df.empty:
        print(f"\nFound {len(filtered_df)} matching positions:")
        for index, row in filtered_df.iterrows():
            print("\n-------------------- Position --------------------")
            display_cols = [col for col in columns_of_interest if col in filtered_df.columns]
            for col in display_cols:
                print(f"{col}: {row[col]}")
        print("\n-------------------- End of Results --------------------")

    else:
        print("\nNo positions found matching your criteria.")

elif df is None:
    print("\nCannot filter because data was not loaded.")
else:
    print("\nFiltering skipped because user input was not provided or was invalid.")
    """
    This function loads the data from the official URL or a fallback CSV file.
    It filters the data to include only the columns of interest.
    """
    try:
        # Attempt to download the Excel file, specifying the header row
        response = requests.get(ater_url)
        response.raise_for_status()
        data = pd.read_excel(BytesIO(response.content), header=2) # Header is in the 3rd row (index 2)
        print("Data successfully loaded from URL.")

        # Drop the first two rows of data (which were originally rows 0 and 1)
        data = data.drop(index=[0, 1]).reset_index(drop=True)

        # Debug: Print column names to verify
        print("Columns in the loaded data after dropping rows:")
        print(data.columns)

        # Clean column names (optional, if needed)
        data.columns = data.columns.str.strip()

        # Filter the DataFrame to include only the columns of interest
        try:
            filtered_data = data[columns_of_interest]
        except KeyError as e:
            print(f"Error: {e}")
            print("Please verify that the column names in 'columns_of_interest' match the actual column names.")
            sys.exit("Exiting due to column mismatch.")

        return filtered_data
    except Exception as e:
        print(f"Failed to load data from URL: {e}")
        print(f"Attempting to load fallback CSV: {fallback_csv}")
        try:
            data = pd.read_csv(fallback_csv)
            print("Data successfully loaded from fallback CSV.")
            # You might need to handle the header and row dropping differently for the CSV
        except Exception as e:
            print(f"Failed to load fallback CSV: {e}")
            sys.exit("Unable to load data. Exiting.")

# Load and display the filtered data
filtered_results = load_and_filter_data()
print(filtered_results.head())

Data successfully loaded from URL.
Columns in the loaded data after dropping rows:
Index(['Année réf', 'UAI', 'Etablissement de rattachement',
       'Numéro de l'appel à candidatures', 'Section 1', 'Section 2',
       'Section 3', 'Section 4', 'Section 5', 'Section 6',
       'Date prise de fonction', 'Date ouverture candidature',
       'Date cloture candidature', 'Profil appel à candidatures',
       'Job profile', 'Research fiels EURAXESS',
       'Localisation appel à candidatures', 'Quotité du poste',
       'Etat du poste', 'Type du poste'],
      dtype='object')
  Etablissement de rattachement Profil appel à candidatures  \
0        UNIVERSITE COTE D'AZUR         ATER en Droit privé   
1        UNIVERSITE COTE D'AZUR   ATER en Science politique   
2        UNIVERSITE COTE D'AZUR  ATER en analyse économique   
3        UNIVERSITE COTE D'AZUR       ATER en microéconomie   
4        UNIVERSITE COTE D'AZUR   ATER en macroéconomie\r\n   

  Date cloture candidature Localisation appe

In [17]:
# --- User Input ---
keyword = ""
target_date_str = ""
target_date = None

if df is not None: # Only ask for input if data is loaded
    print("\n--- Filtering Criteria ---")
    # Get keyword input
    keyword = input(f"Enter a keyword to search in '{profile_col}': ").strip()

    # Get date input and validate format
    while target_date is None:
        target_date_str = input(f"Enter a closing date (format DD/MM/YYYY) to see positions closing on or before this date: ").strip()
        try:
            # Attempt to parse the date string
            target_date = pd.to_datetime(target_date_str, dayfirst=True, errors='raise') # Use 'raise' to catch errors here
            print(f"Target date set to: {target_date.strftime('%d/%m/%Y')}")
        except ValueError:
            print("Invalid date format. Please use DD/MM/YYYY.")
            target_date = None # Reset target_date to loop again
else:
    print("Skipping user input because data loading failed.")


--- Filtering Criteria ---


Enter a keyword to search in 'Profil appel à candidatures':  informatique
Enter a closing date (format DD/MM/YYYY) to see positions closing on or before this date:  20/03/2025


Target date set to: 20/03/2025


Target date set to: 22/03/2025


In [23]:
# --- Filtering and Display ---
import pandas as pd  # Ensure pandas is imported

if df is not None and keyword and target_date:
    print(f"\n--- Filtering Results ---")
    print(f"Filtering for keyword '{keyword}' in '{profile_col}'")
    print(f"Filtering for closing date on or after {target_date.strftime('%d/%m/%Y')} in '{date_col}'")

    # Apply keyword filter (case-insensitive)
    # 'na=False' ensures rows with NaN in the profile column are excluded
    keyword_filter = df[profile_col].str.contains(keyword, case=False, na=False)

    # Apply date filter
    # Ensure the date column is in datetime format before comparison
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce') # Convert to datetime, errors='coerce' will turn invalid dates into NaT
    date_filter = (df[date_col] >= target_date) & (df[date_col].notna())

    # Combine filters
    filtered_df = df[keyword_filter & date_filter]

    # --- Display Results ---
    if not filtered_df.empty:
        print(f"\nFound {len(filtered_df)} matching positions:")
        for index, row in filtered_df.iterrows():
            print("\n-------------------- Position --------------------")
            display_cols = [col for col in columns_of_interest if col in filtered_df.columns]
            for col in display_cols:
                print(f"{col}: {row[col]}")
        print("\n-------------------- End of Results --------------------")

        # --- Option to Save as Excel grouped by Institution ---
        save_excel = input("\nDo you want to save these results to an Excel file grouped by institution? (yes/no): ").lower()
        if save_excel == 'yes':
            output_filename = input("Enter the filename for the Excel file (e.g., ater_positions.xlsx): ")
            try:
                grouped = filtered_df.groupby('Etablissement de rattachement')
                with pd.ExcelWriter(output_filename) as writer:
                    for institution, group in grouped:
                        group.to_excel(writer, sheet_name=institution, index=False)
                print(f"\nResults saved to '{output_filename}', grouped by institution.")
            except Exception as e:
                print(f"\nError saving to Excel: {e}")
    else:
        print("\nNo positions found matching your criteria.")

elif df is None:
    print("\nCannot filter because data was not loaded.")
else:
    print("\nFiltering skipped because user input was not provided or was invalid.")


--- Filtering Results ---
Filtering for keyword 'informatique' in 'Profil appel à candidatures'
Filtering for closing date on or after 20/03/2025 in 'Date cloture candidature'

Found 83 matching positions:

-------------------- Position --------------------
Etablissement de rattachement: UNIVERSITE COTE D'AZUR
Profil appel à candidatures: ATER en Informatique 
Date cloture candidature: 2025-04-24 00:00:00
Localisation appel à candidatures: nan
Quotité du poste: P
Etat du poste: V
Type du poste: ATER

-------------------- Position --------------------
Etablissement de rattachement: UNIVERSITE COTE D'AZUR
Profil appel à candidatures: ATER en Informatique 
Date cloture candidature: 2025-04-24 00:00:00
Localisation appel à candidatures: nan
Quotité du poste: P
Etat du poste: V
Type du poste: ATER

-------------------- Position --------------------
Etablissement de rattachement: UNIVERSITE COTE D'AZUR
Profil appel à candidatures: ATER en Informatique 
Date cloture candidature: 2025-04-24 0


Do you want to save these results to an Excel file grouped by institution? (yes/no):  yes
Enter the filename for the Excel file (e.g., ater_positions.xlsx):  ater2025.xlsx



Results saved to 'ater2025.xlsx', grouped by institution.


