# **Power Bi Vs Tableau**
## **Which one should you learn first?**

The focus of this analysis is to determine which BI tool one should learn first based on job market trends. In this analysis, I will scrap job data from the following job boards:
* **My job Mag Kenya**:provides local context on market trends
* **Upwork**: provides context on the freelance landscape

After scrapping, I will analyse the deman of each BI tool, by applying a regex function to understand which is the most mentioned.

#### Scrapping data from MyJobMag

In [5]:
#import all necerssary libraries

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium_stealth import stealth
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

import time
import random
import csv
import os

In [6]:
#function to initialize the browser

def init_driver():
#set up the browser using the options object

    options=webdriver.ChromeOptions()
    options.add_argument('--start-maximized')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option('excludeSwitches',['enable_automation'])
    options.add_experimental_option('useAutomationExtension','False')

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options
)   
    stealth (driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )
    return driver



In [7]:
#adding the resume feature, if a job has already been scrapped, the scrapper will skip it

scraped_links = set()

if os.path.exists("myjobmag_data.csv"):
    with open("myjobmag_data.csv", "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            scraped_links.add(row["link"])

print(f"Resuming... {len(scraped_links)} jobs already scraped.")

# append mode, so as not to overide data already in the csv
csv_file = open("myjobmag_data.csv", "a", newline="", encoding="utf-8")
writer = csv.DictWriter(csv_file, fieldnames=["title", "description", "link"])

# Write header only if file is empty
if os.stat("myjobmag_data.csv").st_size == 0:
    writer.writeheader()

Resuming... 0 jobs already scraped.


In [9]:
#initialize the driver
driver=init_driver()
base_url=('https://www.myjobmag.co.ke/search/jobs?q=business+intelligence+analyst%2C+data+analyst&q=business+intelligence+analyst%2C+data+analyst')
time.sleep(random.uniform(4,6))


job_links=[]

total_pages=50

for page in range(1, total_pages + 1):
    if page == 1:
        url = base_url
    else:
        url = base_url + f"&currentpage={page}"
    driver.get(url)
    link_elements = driver.find_elements(By.CSS_SELECTOR, "ul.job-list li.job-list-li li.job-info li.mag-b h2 a")

    for element in link_elements:
        href=element.get_attribute('href')
        job_links.append(href)
    
print(job_links[0])


https://www.myjobmag.co.ke/job/manager-data-transformation-intelligence-and-impact-dtii-john-snow-inc-jsi


In [10]:
job_data=[]

for i, link in enumerate (job_links):
    if link in scraped_links:
        print(f"Already scrapped this link,{link}")
        continue
    try:
        driver.get(link)
        time.sleep(2)

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME,"job-details"))
        )
        try:
            title=driver.find_element(By.CSS_SELECTOR,"ul.read-ul li.read-head h1").text.strip()
        except:
            title="N/A"

        try:
            desc = driver.find_element(By.CSS_SELECTOR, "li#printable.job-description div.job-details").text.strip()
        except:
            desc = "N/A"

        job_data.append({
            "title": title,
            "description": desc,
            "link": link
        })
        csv_file.flush()
        print(f"scrapped {title}")

    except Exception as e:
        print(f"There is an error loading the link {link}:{e}")
        if "invalid session id" in str(e).lower():
            print("🔁 Restarting Chrome session...")
            try:
                driver.quit()
            except:
                pass
            driver = init_driver()
        continue

    time.sleep(1.5)
driver.quit()
csv_file.close()
print("Done scraping Data jobs!")

scrapped Manager- Data Transformation Intelligence and Impact (DTII) at John Snow Inc (JSI)
scrapped Manager – Data Transformation Intelligence and Impact at InSupply Health
scrapped Business Intelligence Analyst at Gertrude's Children's Hospital
scrapped Senior Analyst: Business Intelligence at Cellulant Corporation
scrapped Business Intelligence Analyst at SENRI Ltd. (Africa Incubator Ltd.)
scrapped Junior Business Intelligence Analyst at SunCulture Kenya Ltd
scrapped Developer, Business Intelligence at Standard Bank Group
scrapped Supervisor – Business Artificial Intelligence (BAI) at Kenya Revenue Authority (KRA)
scrapped Assistant Manager – Business Intelligence Platform Engineering at Kenya Revenue Authority (KRA)
scrapped Business Intelligence & Analytics Manager at 4G Capital
scrapped Business Intelligence Specialist at Airflo Limited
scrapped Business Intelligence Manager at NCBA Group
scrapped Business Intelligence & Strategy Monitoring at NCBA Group
scrapped Part-Time Lectur

In [11]:
import pandas as pd

data=pd.DataFrame(job_data)
print(data.shape)

(898, 3)


In [None]:
#set up the browser using the options object

options=webdriver.ChromeOptions()
options.add_argument('--start-maximized')
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_experimental_option('excludeSwitches',['enable_automation'])
options.add_experimental_option('useAutomationExtension','False')

driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)

#now setup the webdriver in stealth mode

stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )



In [None]:
job_links=[]

total_pages=100

for page in range(1, total_pages + 1):
    if page == 1:
        url = base_url
    else:
        url = base_url + f"&currentpage={page}"
    driver.get(url)
    link_elements = driver.find_elements(By.CSS_SELECTOR, "ul.job-list li.job-list-li li.job-info li.mag-b h2 a")

    for element in link_elements:
        href=element.get_attribute('href')
        job_links.append(href)
    
print(job_links[0])



https://www.myjobmag.co.ke/job/manager-data-transformation-intelligence-and-impact-dtii-john-snow-inc-jsi


scrapped Manager- Data Transformation Intelligence and Impact (DTII) at John Snow Inc (JSI)
scrapped Manager – Data Transformation Intelligence and Impact at InSupply Health
scrapped Business Intelligence Analyst at Gertrude's Children's Hospital
scrapped Senior Analyst: Business Intelligence at Cellulant Corporation
scrapped Business Intelligence Analyst at SENRI Ltd. (Africa Incubator Ltd.)
scrapped Junior Business Intelligence Analyst at SunCulture Kenya Ltd
scrapped Developer, Business Intelligence at Standard Bank Group
scrapped Supervisor – Business Artificial Intelligence (BAI) at Kenya Revenue Authority (KRA)
scrapped Assistant Manager – Business Intelligence Platform Engineering at Kenya Revenue Authority (KRA)
scrapped Business Intelligence & Analytics Manager at 4G Capital
scrapped Business Intelligence Specialist at Airflo Limited
scrapped Business Intelligence Manager at NCBA Group
scrapped Business Intelligence & Strategy Monitoring at NCBA Group
scrapped Part-Time Lectur

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import traceback

def init_driver():
    return webdriver.Chrome()

driver = init_driver()
job_data = []

for i, link in enumerate(job_links):
    try:
        driver.get(link)
        print(f"🔗 Visiting job {i+1}: {link}")
        time.sleep(2)

        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "read-job-section"))
        )

        try:
            title = driver.find_element(By.CSS_SELECTOR, "ul.read-ul li.read-head h1").text.strip()
        except:
            title = "N/A"

        try:
            desc = driver.find_element(By.CSS_SELECTOR, "li#printable.job-description div.job-details").text.strip()
        except:
            desc = "N/A"

        job_data.append({
            "title": title,
            "description": desc,
            "link": link
        })

        print(f"✅ Scraped: {title}")

    except Exception as e:
        print(f"\n❌ Error loading the link {link}: {e}")
        traceback.print_exc()

        # Restart the driver if session is invalid
        if "invalid session id" in str(e).lower():
            print("🔁 Restarting Chrome session...")
            try:
                driver.quit()
            except:
                pass
            driver = init_driver()
        continue

    time.sleep(1.5)

driver.quit()


In [77]:
import pandas as pd

data=pd.DataFrame(job_data)

In [79]:
data.shape

(56, 3)