# Experiments on Web Form Detection and Auto-filling

This notebook demonstrates our approach for detecting web forms, extracting user‑fillable fields, mapping those fields to a set of user data attributes and automatically filling them using a headless browser.

We will use a simple local HTML file to illustrate the workflow because external websites are not accessible from this environment.

In [16]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))

print("Project root added:", PROJECT_ROOT)
from app.services.form_detector import detect_form
from app.services.form_analyzer import extract_form_fields
from app.services.field_mapper import match_field_to_user_key
from app.services.autofiller import autofill_form
from app.models.schemas import UserData

Project root added: d:\M2_MoSEF\testingform


In [18]:
user = UserData(first_name='Juan', last_name='Alonso', email='juan@example.com', phone='0706',full_name="jdalonsos")
results = autofill_form("https://www.facebook.com/r.php?entry_point=login", user, close_driver=False)
print('Auto-fill results:')
for r in results: 
    print(r)

Auto-fill results:
[AutofilledField(tag='input', type='hidden', name='jazoest', id='', placeholder='', label=None, matched_key=None, confidence=0.0, reason='No match found', filled=False), AutofilledField(tag='input', type='hidden', name='lsd', id='', placeholder='', label=None, matched_key=None, confidence=0.0, reason='No match found', filled=False), AutofilledField(tag='input', type='text', name='firstname', id='u_0_8_CV', placeholder='', label=None, matched_key='first_name', confidence=0.9, reason="Matched by token 'firstname' in field attributes", filled=True), AutofilledField(tag='input', type='text', name='lastname', id='u_0_a_nc', placeholder='', label=None, matched_key='last_name', confidence=0.9, reason="Matched by token 'lastname' in field attributes", filled=True), AutofilledField(tag='select', type='select-one', name='birthday_month', id='month', placeholder=None, label=None, matched_key='birth_day', confidence=0.9, reason="Matched by token 'day' in field attributes", fille

In [15]:
'''
from app.services.form_detector import detect_form
from app.services.form_analyzer import extract_form_fields
from app.services.field_mapper import match_field_to_user_key
from app.services.autofiller import autofill_form
from app.models.schemas import UserData

# Create a sample HTML file with a simple form
html_content = <!DOCTYPE html>
<html>
<head><title>Sample Form</title></head>
<body>
  <h1>Register</h1>
  <form action="/submit" method="post">
    <label for="firstname">First Name:</label>
    <input type="text" id="firstname" name="first_name" placeholder="John"><br><br>
    <label for="lastname">Last Name:</label>
    <input type="text" id="lastname" name="last_name" placeholder="Doe"><br><br>
    <label for="email">Email:</label>
    <input type="email" id="email" name="email" placeholder="john@example.com"><br><br>
    <label for="tel">Phone:</label>
    <input type="tel" id="tel" name="phone" placeholder="1234567890"><br><br>
    <input type="submit" value="Submit">
  </form>
</body>
</html>


# Save to a temporary file
file_path = r'D:\M2_MoSEF\testingform\sample_form.html'
with open(file_path, 'w') as f:
    f.write(html_content)

# Read HTML from the file
with open(file_path) as f:
    html = f.read()

# Detect form
print('Detect Form:')
print(detect_form(html))

# Extract fields
fields = extract_form_fields(html)
print('Extracted Fields:')
for field in fields:
    print(field)

# Map fields to user data keys
print('Mapping Results:')
for field in fields:
    mk, conf, reason = match_field_to_user_key(field)
    print(f"{field.name} -> {mk} (confidence={conf:.2f}) reason: {reason}")

# Prepare user data and auto-fill using Selenium on the local file
user = UserData(first_name='Juan', last_name='Alonso', email='juan@example.com', phone='5551234567')
results = autofill_form('file://' + file_path, user, close_driver=False)
print('Auto-fill results:')
for r in results: 
    print(r)
'''

  file_path = r'D:\M2_MoSEF\testingform\sample_form.html'


'\nfrom app.services.form_detector import detect_form\nfrom app.services.form_analyzer import extract_form_fields\nfrom app.services.field_mapper import match_field_to_user_key\nfrom app.services.autofiller import autofill_form\nfrom app.models.schemas import UserData\n\n# Create a sample HTML file with a simple form\nhtml_content = <!DOCTYPE html>\n<html>\n<head><title>Sample Form</title></head>\n<body>\n  <h1>Register</h1>\n  <form action="/submit" method="post">\n    <label for="firstname">First Name:</label>\n    <input type="text" id="firstname" name="first_name" placeholder="John"><br><br>\n    <label for="lastname">Last Name:</label>\n    <input type="text" id="lastname" name="last_name" placeholder="Doe"><br><br>\n    <label for="email">Email:</label>\n    <input type="email" id="email" name="email" placeholder="john@example.com"><br><br>\n    <label for="tel">Phone:</label>\n    <input type="tel" id="tel" name="phone" placeholder="1234567890"><br><br>\n    <input type="submit"

Exception ignored in: <function Service.__del__ at 0x0000011900CF1080>
Traceback (most recent call last):
  File "d:\M2_MoSEF\testingform\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 200, in __del__
    self.stop()
  File "d:\M2_MoSEF\testingform\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 156, in stop
    self.send_remote_shutdown_command()
  File "d:\M2_MoSEF\testingform\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 142, in send_remote_shutdown_command
    if not self.is_connectable():
           ^^^^^^^^^^^^^^^^^^^^^
  File "d:\M2_MoSEF\testingform\.venv\Lib\site-packages\selenium\webdriver\common\service.py", line 132, in is_connectable
    return utils.is_connectable(self.port)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\M2_MoSEF\testingform\.venv\Lib\site-packages\selenium\webdriver\common\utils.py", line 121, in is_connectable
    socket_ = socket.create_connection((host, port), 1)
              ^^^^^

ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))