In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd

# Set up options for the webdriver
chrome_options = Options()
chrome_options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')

# Set the path to the webdriver
webdriver_path = '/usr/local/bin/chromedriver'
driver = webdriver.Chrome(options=chrome_options)

# Create an empty list to store candidate information
candidates_data = []

# Loop through 10 pages
for page_number in range(1, 10):
    # Construct the URL for each page
    url = f'https://resume-search.com/resume-search/?q=&page={page_number}'

    # Open the webpage
    driver.get(url)

    # Loop through 50 candidates on each page
    for index in range(1, 50):
        try:
            # Wait for the page to load
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'(//div[@class="d-flex heading_colr_name"])[{index}]')))
            print(f"Candidate's name: {index} on Page: {page_number}")

            # Scroll into view
            candidate_xpath = f'(//div[@class="d-flex heading_colr_name"])[{index}]'
            candidate = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, candidate_xpath)))
            candidate_text = candidate.text
            print(f"Candidate's name: {candidate_text}")
            candidate = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, candidate_xpath)))
            driver.execute_script("arguments[0].scrollIntoView();", candidate)

            # Click on the candidate's name
            driver.execute_script("arguments[0].click();", candidate)

            # Wait for the page to load after clicking
            WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '(//p[@class="publicView_para_fontsize fontsize_marginright"])[2]')))

            # Get the HTML content of the page after submitting the form
            candidate_profile_soup = BeautifulSoup(driver.page_source, 'html.parser')

            # Extract the skill set from the specified 'p' element
            skill_set_paragraph = candidate_profile_soup.select_one('p.publicView_para_fontsize.fontsize_marginright + p.publicView_para_fontsize.fontsize_marginright')
            role_element = candidate_profile_soup.select_one('div > h4')
            role_text = role_element.get_text(strip=True) if role_element else "Role not found"
            if skill_set_paragraph:
                # Extract the text content of the 'p' element
                skill_set_text = skill_set_paragraph.get_text(strip=True)

                # Extract the skill set from the text content
                skill_set_start_index = skill_set_text.find(':') + 1
                extracted_skills = skill_set_text[skill_set_start_index:].split(', ')

                print("Extracted Skills:")
                for skill in extracted_skills:
                    print(skill)

                # Append data to the list
                candidate_data = {
                    'Candidate Name': candidate_text,
                    'Role': role_text,
                    'Skills': extracted_skills,
                }
                candidates_data.append(candidate_data)
            else:
                print("Skill set paragraph not found.")

            # Go back to the search results page
            driver.back()

        except Exception as e:
            print(f"Error processing candidate {index} on Page {page_number}: {e}")

# Close the webdriver
driver.quit()

# Create a DataFrame from the list of candidate data
df = pd.DataFrame(candidates_data)

# Print the DataFrame
print(df)

Candidate's name: 1 on Page: 1
Candidate's name: Viswajith Muraleedharan
Extracted Skills:
MySQL
 Team Leadership
 Leadership
 Swagger API
 Maven
 Agile Methodologies
 Laravel
 Yii
 Hibernate
HMTL5
PHP
JAVASCRIPT
JAVA
 Semantic HTML
 SQL
 Spring Boot
 SASS
 Time Management
Candidate's name: 2 on Page: 1
Candidate's name: Sohit Gaurav
Extracted Skills:
JAVA
 Core Java
 Object-Oriented Programming (OOP)
 Postman API
 Spring Boot
 SQL
 Databases
Candidate's name: 3 on Page: 1
Candidate's name: Er. Ritik Chauhan
Extracted Skills:
MySQL
 Python (Programming Language)
 Machine Learning
 Mathematics Education
 Microsoft Power BI
 Django
 Big Data Analytics
 Data Analysis
C
 Data Structures
 Data Analytics
Candidate's name: 4 on Page: 1
Candidate's name: Rakshit mishra
Extracted Skills:
JAVASCRIPT
 SDKs
 Mobile Application Development
 Gradle
 Communication
 Mobile Applications
 Node.js
 Get Hired With GeeksforGeeks
JAVA
C++
 Software Systems
 Kotlin
 Full-Stack Development
 Android Developmen

In [3]:
import pandas as pd
import csv

# Create a DataFrame from the list of candidate data
df = pd.DataFrame(candidates_data)

# Create a dictionary from the DataFrame
candidates_dict = df.to_dict(orient='list')

# Save the dictionary to a CSV file with 'utf-8' encoding
csv_filename = 'candidates_data.csv'
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=candidates_dict.keys())
    writer.writeheader()
    for row in zip(*candidates_dict.values()):
        writer.writerow(dict(zip(candidates_dict.keys(), row)))

# Print the DataFrame
print(df)

              Candidate Name                                  Role  \
0    Viswajith Muraleedharan      Full Stack Web Developer Trainee   
1               Sohit Gaurav                            Apprentice   
2          Er. Ritik Chauhan                                Intern   
3             Rakshit mishra                     Android Developer   
4                Varun Verma                   Marketing Associate   
..                       ...                                   ...   
436            Anubhav Tyagi                   Wordpress Developer   
437               Niraj Bava                  Full-stack Developer   
438               Raju Kumar  Graduation in Mechanical Engineering   
439           Yogesh Gangwar              Senior Software Engineer   
440          Dhirendra Kumar                          UI Developer   

                                                Skills  
0    [MySQL,  Team Leadership,  Leadership,  Swagge...  
1    [JAVA,  Core Java,  Object-Oriented Prog