## Form Filling AI Agent using Selenium and OpenAI


### Requirements:

In [None]:
## Package and library resource requirements
!pip install selenium openai webdriver-manager nltk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


### Setup:

In [333]:
## Importing all the required libraries
import os
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

import json

import openai
from openai import OpenAI

import nltk

# Initialize the WebDriver
def initialize_webdriver():
    options = webdriver.ChromeOptions()
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--ignore-certificate-errors')  # Ignore certificate errors
    options.add_argument('--ignore-ssl-errors=yes')      # Ignore SSL errors
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

In [334]:
## Set up the OpenAI API key and invoke the GPT 3.5 model

openai.api_key = 'sk-proj-xxxxxxxxxxxxxxxx1zOD'

client = OpenAI(
    api_key=openai.api_key,
)

def generate_data(prompt):
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-3.5-turbo",
    )
    return response.choices[0].message.content.strip().replace("'", "")

### Data Generation using OpenAI's GPT model with Prompts:


In [336]:
# Generating Full Name: 'First name', 'Middle name', and 'Last name'
def generate_fullname():
    
    prompt1 = "Generate a first name only. Do not generate any other words along with the first name. Do not ask for the Gender."
    prompt2 = "Generate a middle name only. Do not generate any other words along with the middle name. Do not ask for the Gender, first name, and last name. "
    prompt3 = "Generate a last name only. Do not generate any other words along with the last name. Do not ask for the Gender, first name, and middle name."

    first_name = generate_data(prompt1) 
    middle_name = generate_data(prompt2) 
    last_name = generate_data(prompt3)

    return first_name, middle_name, last_name


# Generating Full Address: 'Street address line 1 & line 2', 'City', 'State', and 'Postal code'
def fulladdress():
    address_line1_prompt = (
        "Generate an Indian Street address line by mentioning:\n"
        "- Flat or Door number\n"
        "- Block or Apartment\n"
        "- Street or Colony only\n\n"
        "Examples:\n"
        "1. 'Flat No. 203, Block C, Medha Colony'\n"
        "2. '16/3, Kamesh Apartments, 62nd Street'\n\n"
        "Important:\n"
        "- Only provide the address line, do not include any introduction or explanation."

    )
    
    address_line2_prompt = (
        "Generate an Indian Street address line by mentioning:\n"
        "- Road name\n"
        "- Neighborhood or District\n"
        "- Landmarks or Notable buildings only\n\n"
        "Examples:\n"
        "1. 'Edulabad Road, Ghatkesar District, near Edulabad MRO and Ghatkesar Police Station'\n"
        "2. 'IT Lane, JM Road, Hitech city neighbourhood, across the IT park'\n\n"
        "Important:\n"
        "- Only provide the address line, do not include any introduction or explanation."
    )
    address_rest_prompt = (
        "Generate an Indian City, and State, and Postal/Zip code only.\n"
        "Examples:\n"
        "1. 'Hyderabad, Telangana, 501301'\n"
        "2. 'Mumbai, Maharashra, 400004'\n\n" 
        "Important:\n"
        "- Only provide the address components, do not include any introduction or explanation."
      )

    line1 =  generate_data(address_line1_prompt)
    line2 =  generate_data(address_line2_prompt)
    rest =  generate_data(address_rest_prompt)

    parts = rest.split(",")
    city = parts[0].strip()
    state = parts[1].strip()
    postal = parts[2].strip()
    
    return line1, line2, city, state, postal


# Generating Email w.r.t the Full Name
def generate_email(first_name, middle_name, last_name):
    prompt = (
        "Generate an email address based on the following names:\n"
        f"First Name: {first_name}\n"
        f"Middle Name: {middle_name}\n"
        f"Last Name: {last_name}\n\n"
        "Important:\n"
        "- email address should end with either @gmail.com or @msn.com or @hotmail.com "
        "- Include numbers in the email address to make it unique\n"
        "- Only provide the email address line, do not include any introduction or explanation."
    )
    email = generate_data(prompt)
    return email


# Generating Phone Number
def generate_phone_number():
    prompt = (
        "Generate an Indian phone number with the country code.\n"
        "The phone number should follow this format: +91-XXXXXXXXXX\n"
        "Replace XXXXXXXXXX with a 10-digit number.\n\n"
        "Important:\n"
        "- Only provide the phone number, and do not include any introduction or explanations."
    )
    phone_number = generate_data(prompt)
    return phone_number


# Generating LinkedIN Profile URL w.r.t the Full Name
def generate_linkedin_url(first_name, middle_name, last_name):
    prompt = (
        "Generate a LinkedIn profile URL based on the following names:\n"
        f"First Name: {first_name}\n"
        f"Middle Name: {middle_name}\n"
        f"Last Name: {last_name}\n\n"
        "The URL should follow the format 'https://www.linkedin.com/in/first-middle-last'.\n"
        "Convert all names to lowercase and use hyphens to separate the names.\n"
        "Include a random string of numbers and letters at the end of the url"
        "Important:\n"
        "- Only provide the LinkedIn profile URL, and do not include any introduction or explanations."
    )
    linkedin_url = generate_data(prompt)
    return linkedin_url    


# Generating Conceptual Answers for the Form's Techinicals Questions
def generate_concept(query):
    prompt = (
        f"Generate a appropriate response for the query: {query}.\n"
        "Provide examples and usecases\n"
        "Use Python for programming related queries"
        "Important:\n"
        "- Only provide the relevant response, do not include any introduction or explanation."
    )

    response = generate_data(prompt)
    return response


# Genrating Cover Letter w.r.t the Full Name, Email, Phone, LinkedIN Profile URL
def generate_cover_letter(first_name, middle_name, last_name, email, phone, linkedin_url):
    # Get today's date in the desired format (e.g., "June 13, 2024")
    today_date = time.strftime("%B %d, %Y")
    
    prompt = (
        "Generate an Cover Letter for the Data Science position:\n"
        f"First Name: {first_name}\n"
        f"Middle Name: {middle_name}\n"
        f"Last Name: {last_name}\n"
        f"Email: {email}\n"
        f"Phone number: {phone}\n"
        f"linkedin_url: {linkedin_url}\n"  
        f"Date: {today_date}\n\n"
        "Example Cover Letter Pattern:\n"
        f"{today_date}\n\n"
        "Ambika Chawal,\n"
        "Microsoft\n"
        "Hyderabad\n\n"
        "Dear Ambika Chawal,\n\n"
        "Generate the body of the Cover letter here"
        "Sincerely,\n"
        f"{first_name} {middle_name} {last_name}\n"
        f"{email}\n"
        f"{phone}\n"
        f"{linkedin_url}\n"
        "Important:\n"  
        "- Hiring Manager's Name: Generate a realistic name\n"
        "- Date: Use today's date\n"
        "- Company Name: Generate a realistic company name\n"
        "- Company Address: Generate a realistic address\n\n"
        "- Use the first name, middle name, last name wherever necessary.\n"
        "- Include experiences in data science projects and achievements.\n"
        "- Only provide the cover letter, do not include any introduction or explanation."
    )
    cover_letter = generate_data(prompt)
    return cover_letter


#### Post processing the generated Full name:

In [337]:
## Cleaning the generated sentence
'''
Sometimes, the OpenAI generates sentences instead of first names. 
Sentences like:
"Sure! How about the name "Emma"?", "how about the name: Ava?"
or Words end with "."

In those cases, we do some post processing to extract only the first name from the sentence.
For this, we can use Natural Language Processing library called nltk.
'''

def extract_name(sentence):
    # Tokenize the sentence into words
    words = nltk.word_tokenize(sentence)

    if len(words) != 1: # If it's not a single word (name)
        # Mark the grammatical categories of words in a text with their part of speech tags
        tagged_words = nltk.pos_tag(words)
        
        for i in range(0, len(tagged_words)):
            '''
            If the word is tagged as NNP (Proper Noun), or
            If the current word is followed by ":"(colon) or '``'(space) tag, then we will retrieve the corresponding word
            '''
            if tagged_words[i][1] == 'NNP' or tagged_words[i-1][0] in (':', '``'):
                return tagged_words[i][0] # Extracting the name (proper noun)
    else:
        return sentence

### Functions for Form Filling:

In [338]:
## Functions for filling the Text field and the Resume upload field
def fill_text_field(driver, field_name, value):
    escaped_value = json.dumps(value)  # Properly escape the value for JavaScript
    script = f"document.getElementsByName('{field_name}')[0].value = {escaped_value};"
    driver.execute_script(script)


def upload_file(driver, field_name, file_path):
    file_input = driver.find_element(By.NAME, field_name)
    file_input.send_keys(file_path)


In [383]:
## Filling each Form Field

# Filling form fields under 'Full Name' section
def fill_FullName(driver, form_Fields):
    '''
    Dynamically finds all input elements within the "Full Name" section
    by locating the parent element with the class "li.form-line[data-type='control_fullname']" and then searching for all input elements within it 
    '''
    parent_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_fullname']")
        
    # Find all input elements within the parent element
    input_elements = parent_element.find_elements(By.CSS_SELECTOR, "input[type='text']")
    
    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        label_element = input_element.find_element(By.XPATH, "following-sibling::label") # selects the label element immediately following each input field
        label_text = label_element.text
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])


# Filling form fields under 'Current Address' section
def fill_current_address(driver, form_Fields):
    '''
    Dynamically finds all input elements within the "Current Address" section
    by locating the parent element with the class "li.form-line[data-type='control_address']" and then searching for all input elements within it 
    '''
    # Find the parent element of the address fields
    current_address_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_address']")

    # Find all input elements within the parent element
    input_elements = current_address_element.find_elements(By.CSS_SELECTOR, "input[type='text']")

    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        label_element = input_element.find_element(By.XPATH, "following-sibling::label") # selects the label element immediately following each input field
        label_text = label_element.text
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])


# Filling 'Email Address' form field 
def fill_email_field(driver, form_Fields):
    # Find the parent element of the address fields
    parent_email_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_email']")
    input_elements = parent_email_element.find_elements(By.CSS_SELECTOR, "input[type='email']")

    label_element = parent_email_element.find_element(By.TAG_NAME, 'label')
    label_text = label_element.text
    
    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])


# Filling 'Phone Number' form field 
def fill_phone_field(driver, form_Fields):

    # Find the parent element of the address fields
    parent_phone_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_phone']")
    
    input_elements = parent_phone_element.find_elements(By.CSS_SELECTOR, "input[type='tel']")

    label_element = parent_phone_element.find_element(By.TAG_NAME, 'label')
    label_text = label_element.text
    
    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])


# Filling 'LinkedIn' form field 
def fill_LinkedIn_field(driver, form_Fields):
    # Find the parent element of the address fields
    parent_LinkedIn_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_textbox']")

    input_elements = parent_LinkedIn_element.find_elements(By.CSS_SELECTOR, "input[type='text']")

    label_element = parent_LinkedIn_element.find_element(By.TAG_NAME, 'label')
    label_text = label_element.text
    
    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])


# Filling Conceptual (Technical Questions') form fields
def fill_concept_fields(driver, form_Fields):
    # Locate the parent elements with the specified class and data-type attribute
    parent_elements = driver.find_elements(By.CSS_SELECTOR, "li.form-line[data-type='control_textbox']")
    
    # Iterate over the parent elements
    for parent in parent_elements:
        input_elements = parent.find_elements(By.CSS_SELECTOR, "input[type='text']")

        for input_element in input_elements:
            input_name = input_element.get_attribute("name")
            label_element = parent.find_element(By.TAG_NAME, 'label')
            label_text = label_element.text

            if 'linkedin' not in input_name and label_text in form_Fields:
                print(f"Input Name: {input_name}, Label Text: {label_text}")
                fill_text_field(driver, input_name, form_Fields[label_text])


# Filling 'Cover Letter' form field 
def fill_CV_field(driver, form_Fields):
    # Find the parent element of the address fields
    parent_cv_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_textarea']")
    # Iterate over the child elements to find input elements
    input_elements = parent_cv_element.find_elements(By.TAG_NAME, "textarea")
    
    label_element = parent_cv_element.find_element(By.TAG_NAME, 'label')
    label_text = label_element.text
    
    # Iterate over each input element and print the corresponding label text
    for input_element in input_elements:
        input_name = input_element.get_attribute("name")
        print(f"Input Name: {input_name}, Label Text: {label_text}")
        
        if label_text in form_Fields:
            fill_text_field(driver, input_name, form_Fields[label_text])

# Filling 'Upload Your Resume' form field 
def fill_Upload_field(driver):
    parent_upload_element = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_fileupload']")
    # Find the file input element within the parent element using CSS selector
    input_element = parent_upload_element.find_element(By.CSS_SELECTOR, "input[type='file']")
    # Retrieve the name attribute
    field_name = input_element.get_attribute("name")

    return field_name

In [385]:
## Functions for Filling and Submitting the Form

def fill_form(driver, form_URL, form_Fields):
    driver.get(form_URL)

    fill_FullName(driver, form_Fields)
    fill_current_address(driver, form_Fields)
    fill_email_field(driver, form_Fields)
    fill_phone_field(driver, form_Fields)    
    fill_LinkedIn_field(driver, form_Fields)
    fill_concept_fields(driver, form_Fields)
    fill_CV_field(driver, form_Fields)


def submit_form(driver):
    try:
        submit_button = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_button'] button[type='submit']")
        
        # Wait until the button is enabled
        while not submit_button.is_enabled():
            time.sleep(1)
            submit_button = driver.find_element(By.CSS_SELECTOR, "li.form-line[data-type='control_button'] button[type='submit']")
        
        # Scroll into view
        driver.execute_script("arguments[0].scrollIntoView(true);", submit_button)
        
        # If it still doesn't work, use JavaScript to click
        driver.execute_script("arguments[0].click();", submit_button)
        print("Form submitted successfully using JavaScript.")
        
    except Exception as e:
        print(f"Could not submit the form due to: {str(e)}")

### Main Function:

In [387]:
# Define the main function to run the script
def main():
    # Generate the data
    first_name, middle_name, last_name = generate_fullname()
    address_line1, address_line2, city, state, postal = fulladdress() 
    email = generate_email(first_name, middle_name, last_name)
    phone = generate_phone_number()
    linkedin_url = generate_linkedin_url(first_name, middle_name, last_name)
    aiagent = generate_data("Write something interesting about AI Agents/ LLMs")
    automation = generate_data("Write something interesting about Web Automation")
    linkedlist = generate_data("Reverse a LinkedList")
    cv = generate_cover_letter(first_name, middle_name, last_name, email, phone, linkedin_url)

    # Assigning the variables to their respective form field names 
    form_Fields = {
        'First Name' : first_name,
        'Middle Name' : middle_name,
        'Last Name' : last_name,
        'Street Address' : address_line1,
        'Street Address Line 2' : address_line2,
        'City' : city,
        'State / Province' : state,
        'Postal / Zip Code' : postal,
        'Email Address' : email,
        'Phone Number' : phone,
        'LinkedIn' : linkedin_url,
        'Write something interesting about AI Agents/ LLMs' : aiagent,
        'Write something interesting about Web Automation': automation,
        'Reverse a LinkedList' : linkedlist,
        'Cover Letter' : cv
    }

    # Paths for Form and Resume
    form_URL = 'https://form.jotform.com/241635027272149'
    #resume_path = os.path.join(os.getcwd(), 'Vipparla Dharan Teja Resume.pdf')
    resume_path = 'C:/Users/VDT/Documents/AI, DS, & DA Stuff/Projects/AI Agent/Vipparla Dharan Teja Resume.pdf'
    
    # Initialize WebDriver
    driver = initialize_webdriver()

    # Fill out and Submit the form
    try:
        fill_form(driver, form_URL, form_Fields)
    
        field_name = fill_Upload_field(driver)    
        if field_name:
            upload_file(driver, field_name, resume_path)

        # Automaticallly submit after 15 seconds. Check the form during this time
        time.sleep(15) 
        submit_form(driver)
        
        # Keep the window opne for 5 more seconds to see if the form is submitted or not
        time.sleep(5) 

    except Exception as e:
        print(f"An error occurred: {e}")
        print("Browser closed. Exiting script.")

        # Quit the browser if an error occurs
        driver.quit()  
    

# Run the main function
if __name__ == "__main__":
    main()


Input Name: q11_fullName[first], Label Text: First Name
Input Name: q11_fullName[middle], Label Text: Middle Name
Input Name: q11_fullName[last], Label Text: Last Name
Input Name: q16_currentAddress[addr_line1], Label Text: Street Address
Input Name: q16_currentAddress[addr_line2], Label Text: Street Address Line 2
Input Name: q16_currentAddress[city], Label Text: City
Input Name: q16_currentAddress[state], Label Text: State / Province
Input Name: q16_currentAddress[postal], Label Text: Postal / Zip Code
Input Name: q12_emailAddress, Label Text: Email Address
Input Name: q13_phoneNumber13[full], Label Text: Phone Number
Input Name: q19_linkedin, Label Text: LinkedIn
Input Name: q24_writeSomething, Label Text: Write something interesting about AI Agents/ LLMs
Input Name: q25_writeSomething25, Label Text: Write something interesting about Web Automation
Input Name: q23_reverseA, Label Text: Reverse a LinkedList
Input Name: q22_coverLetter, Label Text: Cover Letter
Form submitted successf