In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Set the URL
url = "https://www.indexmundi.com/energy/?country=et&product=oil&graph=consumption"


In [2]:
# Fetch the page
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table - typically inside a <table> tag
table = soup.find('table')

# Initialize empty lists to store data
years = []
values = []

# Loop through table rows
for row in table.find_all('tr')[1:]:  # Skip the header row
    cols = row.find_all('td')
    if len(cols) == 2:
        year = cols[0].text.strip()
        value = cols[1].text.strip().replace(',', '')
        years.append(year)
        values.append(value)


In [3]:
# Create a DataFrame
df = pd.DataFrame({
    'Year': years,
    'Consumption (Barrels/Day)': values
})

# Convert year to integer and consumption to float
df['Year'] = df['Year'].astype(int)
df['Consumption (Barrels/Day)'] = pd.to_numeric(df['Consumption (Barrels/Day)'], errors='coerce')

# Save to CSV
df.to_csv('ethiopia_oil_consumption.csv', index=False)

print("Data saved to ethiopia_oil_consumption.csv")
df.head()


Data saved to ethiopia_oil_consumption.csv


Unnamed: 0,Year,Consumption (Barrels/Day)


In [4]:
print(soup.prettify()[:1000])  # Print first 1000 characters of the HTML to inspect structure


<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <title>
   Ethiopia Crude Oil Consumption by Year (Thousand Barrels per Day)
  </title>
  <meta content="696085087" property="fb:admins"/>
  <meta content="http://www.indexmundi.com/img/compare-200x200.jpg" property="og:image"/>
  <link href="/s/s.css" rel="stylesheet" type="text/css"/>
  <script src="/js/charts.3.7.1/fusioncharts.js" type="text/javascript">
  </script>
  <style type="text/css">
   body {font-family:Arial,Verdana,Helvetica,Sans-Serif;margin-left:8px;margin-right:8px;}
    h1 {font-size:smaller;background-color:#e5ecf9;padding:0.5em;}
    h3 {font-size:small;background-color:#e5ecf9;padding:0.5em;margin-bottom:0.25em;margin-top:0.5em;}
    .news p {font-size:smaller;margin-left:0.5em;margin-right:0.5em;}
    .sectionTitle {background-color:#e5ecf9;margin-top:2em;width:98%;}
    .sec


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Replace this with your path to chromedriver
chrome_path = "C:/path/to/chromedriver.exe"
service = Service(chrome_path)

# Start browser
driver = webdriver.Chrome(service=service)

# Load page
url = "https://www.indexmundi.com/energy/?country=et&product=oil&graph=consumption"
driver.get(url)

# Wait for page to load fully
time.sleep(5)

# Extract table rows
rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")

# Extract data
data = []
for row in rows:
    cols = row.find_elements(By.TAG_NAME, "td")
    if len(cols) == 2:
        year = cols[0].text.strip()
        value = cols[1].text.strip()
        data.append([year, value])

# Close browser
driver.quit()

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Year", "Consumption (Barrels/Day)"])
df["Year"] = df["Year"].astype(int)
df["Consumption (Barrels/Day)"] = pd.to_numeric(df["Consumption (Barrels/Day)"], errors='coerce')

# Save to CSV
df.to_csv("ethiopia_oil_consumption.csv", index=False)
print("✅ Data saved to ethiopia_oil_consumption.csv")
print(df.head())


ModuleNotFoundError: No module named 'selenium'