# Web Scraping and Automation of Processes with Python

Why is it important?
Allows efficient data collection without manual intervention.

Reduces time spent on repetitive tasks such as data entry or bulk uploads.

Facilitates integration with other tools and data analysis.


1️⃣ Practical cases, such as extracting data from a website and bulk data entry into platforms.


2️⃣ Process Automation using Selenium and RPA techniques.


In [3]:
# Inicializar el WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.maximize_window()

# URL
url = 'https://www.rpachallenge.com/'
driver.get(url)

# Click of element
stock_market_link = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.LINK_TEXT, "RPA Stock Market"))
)
stock_market_link.click()


In [34]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Set up Selenium
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.maximize_window()

# Navigate to the RPA Challenge website
url = 'https://www.rpachallenge.com/'
driver.get(url)

# Click on the "RPA Stock Market" link
stock_market_link = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.LINK_TEXT, "RPA Stock Market"))
)
stock_market_link.click()

# List to store stock prices
data = []

try:
    while True:
        # Find the price on the page (adjust the XPath according to the website structure)
        price_element = driver.find_element(By.XPATH, "//*[@id='cnt']")  # Adjust XPath if needed
        price = price_element.text

        # Save the timestamp and price
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        data.append([timestamp, price])

        # Print to the console
        print(f"{timestamp} - Price: {price}")

        # Wait 1 second before the next capture
        time.sleep(1)

except KeyboardInterrupt:
    print("Scraping stopped by user.")

    # Save data to a DataFrame and export to CSV
    df = pd.DataFrame(data, columns=["Timestamp", "Price"])
    df.to_csv("stock_prices.csv", index=False)
    print("Data saved in 'stock_prices.csv'.")

finally:
    driver.quit()


2025-03-23 00:55:35 - Price: 77.35
2025-03-23 00:55:36 - Price: 77.35
2025-03-23 00:55:37 - Price: 77.35
2025-03-23 00:55:38 - Price: 76.9
2025-03-23 00:55:39 - Price: 76.9
2025-03-23 00:55:40 - Price: 76.9
2025-03-23 00:55:41 - Price: 76.25
2025-03-23 00:55:42 - Price: 76.25
2025-03-23 00:55:43 - Price: 76.25
2025-03-23 00:55:44 - Price: 75.8
2025-03-23 00:55:45 - Price: 75.8
2025-03-23 00:55:46 - Price: 75.15
2025-03-23 00:55:47 - Price: 75.15
2025-03-23 00:55:48 - Price: 75.15
2025-03-23 00:55:49 - Price: 74.7
2025-03-23 00:55:50 - Price: 74.7
2025-03-23 00:55:51 - Price: 74.7
2025-03-23 00:55:52 - Price: 74.05
2025-03-23 00:55:53 - Price: 74.05
2025-03-23 00:55:54 - Price: 74.05
2025-03-23 00:55:55 - Price: 73.6
2025-03-23 00:55:56 - Price: 73.6
Scraping stopped by user.
Data saved in 'stock_prices.csv'.


In [35]:
type(data)

list

In [36]:
# Convert to DataFrame
dataframe = pd.DataFrame(data, columns=["Time","price"])
dataframe

Unnamed: 0,Time,price
0,2025-03-23 00:55:35,77.35
1,2025-03-23 00:55:36,77.35
2,2025-03-23 00:55:37,77.35
3,2025-03-23 00:55:38,76.9
4,2025-03-23 00:55:39,76.9
5,2025-03-23 00:55:40,76.9
6,2025-03-23 00:55:41,76.25
7,2025-03-23 00:55:42,76.25
8,2025-03-23 00:55:43,76.25
9,2025-03-23 00:55:44,75.8


## AUTOMATION OF PROCESS WITH PYTHON
## CREATE OF RPA (ROBOT PROCESS AUTOMATION)

In [10]:
import pandas as pd  

# Load the Excel file using a relative path
df = pd.read_excel("././challenge.xlsx")

df


Unnamed: 0,First Name,Last Name,Company Name,Role in Company,Address,Email,Phone Number
0,John,Smith,IT Solutions,Analyst,98 North Road,jsmith@itsolutions.co.uk,40716543298
1,Jane,Dorsey,MediCare,Medical Engineer,11 Crown Street,jdorsey@mc.com,40791345621
2,Albert,Kipling,Waterfront,Accountant,22 Guild Street,kipling@waterfront.com,40735416854
3,Michael,Robertson,MediCare,IT Specialist,17 Farburn Terrace,mrobertson@mc.com,40733652145
4,Doug,Derrick,Timepath Inc.,Analyst,99 Shire Oak Road,dderrick@timepath.co.uk,40799885412
5,Jessie,Marlowe,Aperture Inc.,Scientist,27 Cheshire Street,jmarlowe@aperture.us,40733154268
6,Stan,Hamm,Sugarwell,Advisor,10 Dam Road,shamm@sugarwell.org,40712462257
7,Michelle,Norton,Aperture Inc.,Scientist,13 White Rabbit Street,mnorton@aperture.us,40731254562
8,Stacy,Shelby,TechDev,HR Manager,19 Pineapple Boulevard,sshelby@techdev.com,40741785214
9,Lara,Palmer,Timepath Inc.,Programmer,87 Orange Street,lpalmer@timepath.co.uk,40731653845


In [None]:
df.columns

In [None]:
# rename columns
df = pd.read_excel("././challenge.xlsx")

df = df.rename(columns={"First Name": "First_name", "Last Name ": "Last_name", "Company Name": "Company",
                        "Phone Number": "Phone_number","Role in Company":"Role_Company"})
df.columns

We need to upload the information from a form response to the company's website.

### Import libreries

In [11]:
# This library is to manipulate the browser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver import ActionChains

# WebDriver Manager
from webdriver_manager.chrome import ChromeDriverManager

# Standard libraries
import pandas as pd
import numpy as np
import os
import time
import re
from unidecode import unidecode


In [None]:
# First, we will click on the phone element using its reflect name.
phone = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelPhone']"))
)
phone.click()



In [None]:
first = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelFirstName']"))
)

first.click()

In [None]:
last = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelLastName']"))
)
last.click()

In [None]:
role = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelRole']"))
)

role.click()

In [None]:
email = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelEmail']"))
)
email.click()

In [None]:
address = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelAddress']"))
)
address.click()


In [None]:
company = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//input[@ng-reflect-name='labelCompanyName']"))
)
company,click()

In [None]:
start = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Start')]"))
)
start.click()


In [None]:
submit_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CLASS_NAME, "btn"))
)
submit_button.click()


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

In [38]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.maximize_window()

# Challenge URL
url = 'https://www.rpachallenge.com/'
driver.get(url)

time.sleep(2)  # Wait for the page to load

# Click the "Start" button
start_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Start')]"))
)
start_button.click()

# Define column mapping
column_to_web = {
    "First_name": "labelFirstName",
    "Last_name": "labelLastName",
    "Phone_number": "labelPhone",
    "Email": "labelEmail",
    "Address": "labelAddress",
    "Company": "labelCompanyName",
    "Role_Company": "labelRole"
}

# Load the DataFrame
df = pd.read_excel("./challenge.xlsx").rename(columns={
    "First Name": "First_name",
    "Last Name ": "Last_name",
    "Company Name": "Company",
    "Phone Number": "Phone_number",
    "Role in Company": "Role_Company"
})

# Function to fill input fields
def fill_input_field(driver, field_name, value):
    field = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, f"//input[@ng-reflect-name='{field_name}']"))
    )
    driver.execute_script("arguments[0].scrollIntoView();", field)
    field.click()
    field.clear()
    field.send_keys(value)
    time.sleep(1)

# Function to click "Submit"
def click_submit(driver):
    submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//input[@class='btn uiColorButton' and @type='submit']"))
    )
    driver.execute_script("arguments[0].scrollIntoView();", submit_button)
    submit_button.click()
    time.sleep(2)

# Iterate through each row of the DataFrame and fill in the data
for index, row in df.iterrows():
    print(f"Filling data for: {row['First_name']} {row['Last_name']}")

    for col_df, web_name in column_to_web.items():
        fill_input_field(driver, web_name, row[col_df])

    click_submit(driver)  # Click "Submit" after completing a row

print("Finished Work")

Filling data for: John Smith
Filling data for: Jane Dorsey
Filling data for: Albert Kipling
Filling data for: Michael Robertson
Filling data for: Doug Derrick
Filling data for: Jessie Marlowe
Filling data for: Stan Hamm
Filling data for: Michelle Norton
Filling data for: Stacy Shelby
Filling data for: Lara Palmer
Finished Work


In [40]:
manual=30 * 7
minut=manual / 60 

print(f"{minut:.2f} minutes")

3.50 minutes


In [39]:
milliseconds = 101160

seconds = milliseconds / 1000  # Convert to seconds
minutes = seconds / 60  # Convert to minutes

print(f"{milliseconds} ms = {seconds:.2f} seconds")
print(f"{milliseconds} ms = {minutes:.2f} minutes")



101160 ms = 101.16 seconds
101160 ms = 1.69 minutes
