In [1]:
!pip install selenium beautifulsoup4 pandas seaborn matplotlib


Collecting selenium
  Downloading selenium-4.30.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.30.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.29.0-py3-none-any.whl (492 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m492.9/492.9 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloading outcome-1.3.0.post0-py2.py3-

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd

# Selenium configuration to run in headless mode (without GUI)

options = Options()
options.add_argument("--headless")  # Exécuter sans ouvrir une fenêtre de navigateur
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Open browser with Selenium

driver = webdriver.Chrome(options=options)
url = "https://www.inmotionhosting.com/"
driver.get(url)

# Extract HTML from the page
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()

# Find all accommodation offers
plans = soup.find_all("div", class_="imh-rostrum-card")

# Store data
plan_names = []
discounted_prices = []
renewal_prices = []
features = []

# Extract information from each plan
for plan in plans:

    name_tag = plan.find("h3", class_="imh-rostrum-card-title")
    name = name_tag.text.strip() if name_tag else "Not found"

    # Reduced price
    discount_tag = plan.find("div", class_="imh-rostrum-starting-at-price-discounted")
    discount_price = discount_tag.find("span", class_="rostrum-price").text.strip() if discount_tag else "Not found"

    # Renewal price
    renewal_tag = plan.find("div", class_="imh-rostrum-starting-at-price-normal")
    renewal_price = renewal_tag.find("span", class_="rostrum-price").text.strip() if renewal_tag else "Not found"

    # Features
    feature_list = plan.find("ul", class_="imh-rostrum-details-list")
    feature_texts = [li.text.strip() for li in feature_list.find_all("li")] if feature_list else ["Not found"]

    # Add data to lists
    plan_names.append(name)
    discounted_prices.append(discount_price)
    renewal_prices.append(renewal_price)
    features.append(", ".join(feature_texts))

# Use a dictionary to merge plans based on their name
merged_plans = {}

for i in range(len(plan_names)):
    name = plan_names[i]
    promo = discounted_prices[i]
    renewal = renewal_prices[i]
    feature = features[i]

    if name in merged_plans:
        # If the plan already exists, we update the information if it is available

        if promo != "Not found":
            merged_plans[name]["Promotional price"] = promo
        if renewal != "Not found":
            merged_plans[name]["Renewal price"] = renewal
        if feature != "Not found":
            merged_plans[name]["Features"].update(feature.split(", "))
    else:
        # Create a new dictionary entry
        merged_plans[name] = {
            "Promotional price": promo,
            "Renewal price": renewal,
            "Features": set(feature.split(", ")) if feature != "Not found" else set(),
        }

# Convert dictionary to pandas DataFrame
final_data = {
    "Plan Name": [],
    "Discounted Price": [],
    "Renewal Price": [],
    "Features": [],
}

# Fill the DataFrame with the merged data
for plan, details in merged_plans.items():
    final_data["Plan Name"].append(plan)
    final_data["Discounted Price"].append(details["Promotional price"])
    final_data["Renewal Price"].append(details["Renewal price"])
    final_data["Features"].append(", ".join(details["Features"]))

# Create the DataFrame
df = pd.DataFrame(final_data)

# Save data to a CSV file
df.to_csv("inmotion_hosting_plans.csv", index=False)

# Show a confirmation message
print("The data was saved in 'inmotion_hosting_plans.csv'.")

# Display the first 5 rows of the DataFrame to check
print(df.head())

The data was saved in 'inmotion_hosting_plans.csv'.
           Plan Name Discounted Price Renewal Price  \
0     Shared Hosting            $3.19         $9.99   
1   cPanel WordPress            $3.69     Not found   
2        VPS Hosting            $4.49        $13.99   
3  Dedicated Hosting           $35.00        $69.99   
4  WordPress Hosting            $3.69        $10.49   

                                            Features  
0  Free Domain & SSL, Unlimited Email Addresses, ...  
1                                                     
2  cPanel and Control Web Panel Available, Cloud-...  
3  Configurable Server For Ultimate Flexibility, ...  
4  Free Premium Themes & Plugins, Free Domain & S...  
