In [None]:
from time import perf_counter
start = perf_counter()

In [None]:
MODELS = {
    "llama-full": "llama3:8b-instruct-fp16",          # 16 GB
    "llama": "llama3:8b-instruct-q5_K_M",             # 5.7 GB

    "mistral-full": "mistral:7b-instruct-fp16",       # 14 GB
    "mistral": "mistral:7b-instruct-q5_K_M",          # 5.1 GB

    "phi": "phi3:14b-medium-4k-instruct-q5_K_M",      # 10 GB
    "phi-mini": "phi3:3.8b-mini-4k-instruct-q5_K_M",  # 2.8 GB
}

In [None]:
model_id = "llama"
MODEL = MODELS[model_id]

# Data pipeline

In [None]:
from webapp.process import *

## 0 | URL of the position to apply

In [None]:
url = "https://emploi.cnrs.fr/Offres/CDD/FR636-MARCAS-009/Default.aspx"

## 1 | Fetching data

In [None]:
source_code_html = fetch_data(url)

## 2 | Clearing data

### 2.1 - Regex

In [None]:
final_content = html_extract_content(source_code_html)

### 2.2 AI summary

In [None]:
print(final_content)

In [None]:
position_data = summarize_position_data(final_content, language="Francais", model=MODEL)
print(position_data)

## 3 | Saving position's data

### 3.1 Create application's folder

In [None]:
import os
from datetime import datetime, timedelta
import time

def create_timestamped_folder():
    while True:
        # Get the current timestamp
        current_timestamp = datetime.now()

        # Format the timestamp
        timestamp_str = current_timestamp.strftime("%Y-%m-%d_%H-%M")

        # Create the new folder name
        folder_name = f"data/applications/{timestamp_str}_{model_id}"

        # Check if the folder already exists
        if not os.path.exists(folder_name):
            # Create the new folder
            os.makedirs(folder_name)
            print(f"New folder created: {folder_name}")
            break
        else:
            # Wait until the start of the next minute
            print(f"Folder {folder_name} already exists. Waiting for the next minute...")
            now = datetime.now()
            next_minute = (now + timedelta(minutes=1)).replace(second=0, microsecond=0)
            wait_time = (next_minute - now).total_seconds()
            time.sleep(wait_time)
    return folder_name

application_folder = create_timestamped_folder()

### 3.2 Position's data

In [None]:
with open(f'{application_folder}/position_data.json', 'w') as file:
    json.dump(position_data, file, indent=4)

## 4 | User data

In [None]:
user_data = json.load(open("data/user.json"))

## 5 | Cover letter

### 5.1 Content

In [None]:
cover_letter = generate_cover_letter(position_data=position_data, user_data=user_data, language="Francais", model=MODEL)
print(cover_letter)

### 5.2 Typst to pdf

In [None]:
export_cover_letter_for_pipeline(user_data, position_data, cover_letter, application_folder, "pdf")

In [None]:
print(perf_counter() - start)