In [1]:
from pymongo import MongoClient
from gridfs import GridFS
from bson import ObjectId
import io
import pandas as pd
from IPython.display import display, HTML

# MongoDB connection
client = MongoClient('mongodb://localhost:27017/')
db = client['job_applications']
fs = GridFS(db)
collection = db['applicants']

# Fetch all applicants data from MongoDB
applicants = collection.find()

# Prepare applicants data for display
applicants_data = []
for applicant in applicants:
    applicant_info = {
        "applicant_name": applicant.get("applicant_name"),
        "job_title": applicant.get("job_title"),
        "resume_url": None,
        "resume_attachment": applicant.get("resume_attachment")
    }

    # Check if resume_attachment_id exists
    if "resume_attachment_id" in applicant:
        resume_id = applicant["resume_attachment_id"]
        applicant_info["resume_url"] = f"View Resume for {applicant_info['applicant_name']}"

    applicants_data.append(applicant_info)

# Convert the data to a Pandas DataFrame for easy display
df = pd.DataFrame(applicants_data)

# Display the applicants data in the notebook
display(HTML(df.to_html()))

# Function to view resume
def view_resume(resume_id):
    try:
        # Convert resume_id from string to ObjectId
        file_id = ObjectId(resume_id)
        
        # Fetch the file from GridFS
        file = fs.get(file_id)
        
        # Display the file content (for simplicity, we'll show the first few bytes of the resume)
        file_content = file.read(500)  # Display first 500 bytes for preview
        return file_content
    except Exception as e:
        return f"Error occurred while fetching file: {str(e)}"

Unnamed: 0,applicant_name,job_title,resume_url,resume_attachment
0,SAYED MOHAMMAD FIRDOUSI,JO-2024-0011,View Resume for SAYED MOHAMMAD FIRDOUSI,
1,SAYED MOHAMMAD FIRDOUSI,JO-2024-0011,,
2,SAYED MOHAMMAD FIRDOUSI,JO-2024-0011,,Resume_765752.pdf
3,RISHABH TEST BY MOHAMMAD,006-assurance-service-engineer-ahmedabad,View Resume for RISHABH TEST BY MOHAMMAD,
4,RISHABH TEST BY MOHAMMAD,006-assurance-service-engineer-ahmedabad,,
5,RISHABH TEST BY MOHAMMAD,006-assurance-service-engineer-ahmedabad,,Sales_Order_Service_457692.pdf
6,AYUSH TIWARI,logistics-manager,View Resume for AYUSH TIWARI,
7,AYUSH TIWARI,logistics-manager,,
8,AYUSH TIWARI,logistics-manager,,Tablet_853759.pdf


In [2]:
df_resume_url = df.dropna(subset=['resume_url'])

df_resume_attachment = df.dropna(subset=['resume_attachment'])

df_merged = pd.merge(df_resume_url, df_resume_attachment, on=["applicant_name", "job_title"], how="outer")
df_merged = df_merged.drop(columns = ['resume_attachment_x'])
df_merged = df_merged.drop(columns = ['resume_url_y'])
df_merged.rename(columns={'resume_attachment_y': 'resume_attachment'}, inplace=True)
df_merged

Unnamed: 0,applicant_name,job_title,resume_url_x,resume_attachment
0,AYUSH TIWARI,logistics-manager,View Resume for AYUSH TIWARI,Tablet_853759.pdf
1,RISHABH TEST BY MOHAMMAD,006-assurance-service-engineer-ahmedabad,View Resume for RISHABH TEST BY MOHAMMAD,Sales_Order_Service_457692.pdf
2,SAYED MOHAMMAD FIRDOUSI,JO-2024-0011,View Resume for SAYED MOHAMMAD FIRDOUSI,Resume_765752.pdf


In [3]:
import requests
import pandas as pd
import time 
from datetime import datetime 


start_time = time.time()

base_url = 'https://erpv14.electrolabgroup.com/'
endpoint = 'api/resource/Job Applicant'
url = base_url + endpoint

params = {
    'fields': '["name","resume_attachment"]',
    'limit_start': 0, 
    'limit_page_length': 100000000000,
}

headers = {
    'Authorization': 'token 3ee8d03949516d0:6baa361266cf807'
}

response = requests.get(url, params=params, headers=headers)

if response.status_code == 200:
    data = response.json()
    print("Fields are correct.")
    job_df = pd.DataFrame(data['data'])
else:
    print(f"Failed to fetch data. Status code: {response.status_code}")

end_time = time.time()
print(f"Request executed in {end_time - start_time:.2f} seconds")


Fields are correct.
Request executed in 0.81 seconds


In [4]:
job_df = job_df.dropna(subset=['resume_attachment'])
job_df

Unnamed: 0,name,resume_attachment
11,aarifmnsri@gmail.com,/private/files/Naukri_ARIFMANSURI[7y_0m].pdf
26,abhishekkhandagale1718@gmail.com,/private/files/DOC-20240104-WA0004. (1).docx
69,akhileshpagare29@gmail.com,/private/files/AKHILESH RESUME 1.pdf
76,akshay.kurane17@gmail.com,/private/files/Akshay Kurane CV.pdf
78,akshay.panhalkar@gmail.com,/private/files/1Akshay Panahlkar.pdf
...,...,...
1598,vinitdagde1500@gmail.com-1,/private/files/Naukri_VinitBalasahebDagade[Fre...
1604,vipankumar91195@gmail.com,/private/files/Naukri_VipanKumar[7y_0m].pdf
1617,visheshtak2001@gmail.com,/private/files/Naukri_VisheshTak[Fresher] (1).pdf
1625,vksballa@gmail.com,/private/files/Naukri_VikasAshokBallal_4375501...


In [5]:
filtered_job_df = job_df[job_df['resume_attachment'].isin(df_merged['resume_attachment'])]
filtered_job_df

Unnamed: 0,name,resume_attachment
212,ayushstiwari@gmail.com,Tablet_853759.pdf
755,mohammadsayed722@gmail.com-1,Resume_765752.pdf
1517,test@gmail.com,Sales_Order_Service_457692.pdf


In [6]:
import os
from pymongo import MongoClient
from gridfs import GridFS
from bson import ObjectId

# MongoDB connection
client = MongoClient('mongodb://localhost:27017/')
db = client['job_applications']
fs = GridFS(db)
collection = db['applicants']

upload_folder = r"C:\Users\Sayadf\Another Quarter\HR Recuitment Form\Resume_Upload"

if not os.path.exists(upload_folder):
    os.makedirs(upload_folder)

for index, row in filtered_job_df.iterrows():
    # Get the resume filename from the 'resume_attachment' column
    resume_filename = row['resume_attachment']
    
    # Replace spaces with underscores in the resume filename
    resume_filename = resume_filename.replace(" ", "_")
    
    # Get the applicant's name from the 'name' column
    applicant_name = row['name']
    
    # Look for the applicant in the collection using the resume filename
    applicant = collection.find_one({'resume_attachment': resume_filename})
    
    if applicant:
        # Retrieve file metadata from GridFS using the resume filename
        file_metadata = db.fs.files.find_one({'filename': resume_filename})
        
        if file_metadata:            
            file_id = file_metadata['_id'] 
            
            try:
                # Fetch the file from GridFS
                file = fs.get(file_id)
                
                # Save the file with only the applicant's name and .pdf extension
                destination_path = os.path.join(upload_folder, f"{applicant_name}.pdf")
                
                with open(destination_path, 'wb') as f:
                    f.write(file.read())
                print(f"Downloaded: {applicant_name}.pdf")
            except Exception as e:
                print(f"Error downloading {resume_filename}: {e}")
        else:
            print(f"File metadata not found for: {resume_filename}")
    else:
        print(f"Resume not found for: {resume_filename}")


Downloaded: ayushstiwari@gmail.com.pdf
Downloaded: mohammadsayed722@gmail.com-1.pdf
Downloaded: test@gmail.com.pdf
