In [6]:
import json
import os
from pymongo import MongoClient

from agents.Server.db import get_db

In [7]:
def insert_jobs_from_json(file_path):
    try:
        db = get_db()
        jobs_collection = db.jobs
        with open(file_path, 'r', encoding='utf-8') as file:
            jobs = json.load(file)
            if isinstance(jobs, list):
                source_name = os.path.basename(file_path).replace('.json', '')

                # Add the source field to each job
                for job in jobs:
                    job["FROM"] = source_name

                result = jobs_collection.insert_many(jobs)
                print(f"{len(result.inserted_ids)} jobs successfully inserted from {source_name}!")
            else:
                print("The JSON file_manager.py must contain a list of job objects.")
    except Exception as e:
        print(f"Error inserting jobs from {file_path}: {e}")


In [None]:
def insert_jobs_from_json_updated(file_path):
    db = get_db()
    jobs_collection = db['jobs_collection']
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            # Determine if the file_manager.py is JSON Lines or JSON Array
            first_line = file.readline().strip()
            if first_line.startswith("{"):
                # JSON Lines: Process line by line
                file.seek(0)  # Reset to the beginning of the file_manager.py
                jobs = [json.loads(line) for line in file if line.strip()]
            else:
                # JSON Array: Parse as a whole
                file.seek(0)
                jobs = json.load(file)

            if isinstance(jobs, list):
                source_name = os.path.basename(file_path).replace('.json', '')

                # Filter jobs to remove duplicates based on 'source' and 'job_title'
                jobs_filtered = []
                seen_jobs = set()  # Track unique jobs by a tuple (source, job_title)

                for job in jobs:
                    job["FROM"] = source_name
                    job_key = (job['source'], job['job_title'])

                    if job_key not in seen_jobs:
                        seen_jobs.add(job_key)
                        jobs_filtered.append(job)

                print(f"After filtering: {len(jobs_filtered)} jobs to be inserted.")

                # Insert filtered jobs into the collection
                if jobs_filtered:
                    result = jobs_collection.insert_many(jobs_filtered)
                    print(f"{len(result.inserted_ids)} jobs successfully inserted from {source_name}!")
                else:
                    print("No new jobs to insert after filtering.")
            else:
                print("The JSON file_manager.py must contain a list of job objects.")
    except Exception as e:
        print(f"Error inserting jobs from {file_path}: {e}")


insert_jobs_from_json_updated(
    r'C:\Users\אביב\PycharmProjects\jobFinder\agents\selenuim\json_jobs\linkedin_jobs.json.json')

In [9]:
from pymongo import MongoClient


def delete_jobs_by_source(source_value):
    """
    Deletes all documents from the MongoDB collection where the FROM field matches the given value.

    Args:
        source_value (str): The value of the FROM field to filter and delete documents.
    
    Returns:
        int: The number of documents deleted.
    """
    try:
        # Connect to MongoDB
        client = MongoClient('mongodb://localhost:27017/')
        db = client.job_database  # Replace with your database name
        jobs_collection = db.jobs  # Replace with your collection name

        # Define the filter for deletion
        filter_query = {"FROM": source_value}

        # Perform the deletion
        result = jobs_collection.delete_many(filter_query)

        # Log and return the result
        print(f"Deleted {result.deleted_count} documents where FROM='{source_value}'.")
        return result.deleted_count
    except Exception as e:
        print(f"An error occurred: {e}")
        return 0


In [11]:
insert_jobs_from_json_updated(
    r'C:\Users\אביב\PycharmProjects\jobFinder\agents\selenuim\json_jobs\linkedin_jobs.json')

Error inserting jobs from C:\Users\אביב\PycharmProjects\jobFinder\agents\selenuim\json_jobs\linke_jobs.json: [Errno 2] No such file or directory: 'C:\\Users\\אביב\\PycharmProjects\\jobFinder\\agents\\selenuim\\json_jobs\\linke_jobs.json'


In [33]:
deleted_count = delete_jobs_by_source("drushimIL_jobs")
print(f"Number of documents deleted: {deleted_count}")

Deleted 0 documents where FROM='drushimIL_jobs'.
Number of documents deleted: 0


In [None]:
import os

# Use os.getcwd() to get the current working directory
current_path = os.getcwd()

# Get the parent directory
parent_directory = os.path.dirname(current_path)

print("Current directory:", current_path)
print("Parent directory:", parent_directory)
