In [5]:
import logging
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Set up logging
logging.basicConfig(
    filename="scraper.log",
    level=logging.ERROR,
    format="%(asctime)s - %(message)s"
)

def setup_driver():
    """Set up the Selenium WebDriver."""
    chrome_options = Options()
    chrome_options.binary_location = r"chrome.exe"
    service = Service(executable_path=r"chromedriver.exe")
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def navigate_to_academic_page(driver):
    """Navigate to the academic page for Abdullah Gül Üniversitesi."""
    # Step 1: Load the university list page
    driver.get("https://akademik.yok.gov.tr/AkademikArama/view/universityListview.jsp")

    # Wait for the table to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-striped tbody"))
    )

    # Find and click the "Abdullah Gül Üniversitesi" link
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table-striped tbody tr")
    for row in rows:
        try:
            university_name = row.find_element(By.TAG_NAME, "a").text.strip()
            if university_name == "ABDULLAH GÜL ÜNİVERSİTESİ":
                row.find_element(By.TAG_NAME, "a").click()
                print(f"Navigated to academics page for {university_name}.")
                return True
        except Exception as e:
            logging.error(f"Error navigating to academic page: {e}", exc_info=True)

    print("Abdullah Gül Üniversitesi not found in the list.")
    return False

def scrape_academics(driver, university_name):
    """Scrape all academic data for Abdullah Gül Üniversitesi."""
    academics = []

    while True:
        try:
            # Wait for the table to load
            rows = WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table#authorlistTb tbody tr"))
            )

            # Extract academic details from the table rows
            for row in rows:
                try:
                    name = row.find_element(By.CSS_SELECTOR, "h4 a").text.strip()
                    title = row.find_element(By.CSS_SELECTOR, "h6").text.strip()
                    details = row.find_element(By.CSS_SELECTOR, "td:nth-child(3)").text.strip()
                    academics.append({
                        "University Name": university_name,
                        "Academic Name": name,
                        "Title": title,
                        "Details": details
                    })
                except Exception as e:
                    logging.error(f"Error extracting academic data: {e}", exc_info=True)

            # Handle Pagination
            try:
                next_page = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "ul.pagination li a[rel='next']"))
                )
                next_page.click()
                time.sleep(random.uniform(2, 4))  # Random delay to avoid detection
            except Exception:
                print("No more pages or pagination failed.")
                break

        except Exception as e:
            logging.error(f"Error occurred while scraping academics: {e}", exc_info=True)
            break

    return academics

def main():
    driver = setup_driver()
    all_data = []

    try:
        # Step 1: Navigate to Abdullah Gül Üniversitesi's academic page
        if not navigate_to_academic_page(driver):
            print("Failed to navigate to Abdullah Gül Üniversitesi's academic page.")
            return

        # Step 2: Scrape academic data
        university_name = "ABDULLAH GÜL ÜNİVERSİTESİ"
        print(f"Scraping academics for {university_name}...")
        data = scrape_academics(driver, university_name)
        print(f"Scraped data: {data}")  # Debugging output
        all_data.extend(data)

        # Step 3: Save data to CSV
        if all_data:
            df = pd.DataFrame(all_data)
            df.to_csv("abdullah_gul_universitesi_academics.csv", index=False, encoding="utf-8-sig")
            print("Scraping completed. Data saved to 'abdullah_gul_universitesi_academics.csv'.")
        else:
            print("No data was scraped. Check the website or script for issues.")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Navigated to academics page for ABDULLAH GÜL ÜNİVERSİTESİ.
Scraping academics for ABDULLAH GÜL ÜNİVERSİTESİ...
No more pages or pagination failed.
Scraped data: [{'University Name': 'ABDULLAH GÜL ÜNİVERSİTESİ', 'Academic Name': 'BURAK ASILİSKENDER', 'Title': 'PROFESÖR', 'Details': 'PROFESÖR\nBURAK ASILİSKENDER\nABDULLAH GÜL ÜNİVERSİTESİ/MİMARLIK FAKÜLTESİ/MİMARLIK BÖLÜMÜ/MİMARİ TASARIM VE ELEŞTİRİ ANABİLİM DALI/\nMimarlık-Planlama-Tasarım   Mimarlık Mimari Tasarım ; Mimarlıkta Kuram- Eleştiri- Yöntem'}, {'University Name': 'ABDULLAH GÜL ÜNİVERSİTESİ', 'Academic Name': 'BURAK UZAL', 'Title': 'PROFESÖR', 'Details': 'PROFESÖR\nBURAK UZAL\nABDULLAH GÜL ÜNİVERSİTESİ/MÜHENDİSLİK FAKÜLTESİ/İNŞAAT MÜHENDİSLİĞİ BÖLÜMÜ/YAPI ANABİLİM DALI/\nMühendislik Temel Alanı   İnşaat Mühendisliği Yapı Malzemeleri'}, {'University Name': 'ABDULLAH GÜL ÜNİVERSİTESİ', 'Academic Name': 'ENGİN ARIK', 'Title': 'PROFESÖR', 'Details': 'PROFESÖR\nENGİN ARIK\nABDULLAH GÜL ÜNİVERSİTESİ/İNSAN VE TOPLUM BİLİMLERİ FAKÜLTE

In [7]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("abdullah_gul_universitesi_academics.csv", encoding="utf-8-sig")

# Print the total number of academics
print(f"Total number of academics: {len(df)}")


Total number of academics: 20


In [9]:
import logging
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Set up logging
logging.basicConfig(
    filename="scraper.log",
    level=logging.ERROR,
    format="%(asctime)s - %(message)s"
)

def setup_driver():
    """Set up the Selenium WebDriver."""
    chrome_options = Options()
    chrome_options.binary_location = r"chrome.exe"
    service = Service(executable_path=r"chromedriver.exe")
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def navigate_to_academic_page(driver):
    """Navigate to the academic page for Abdullah Gül Üniversitesi."""
    driver.get("https://akademik.yok.gov.tr/AkademikArama/view/universityListview.jsp")

    # Wait for the table to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-striped tbody"))
    )

    # Find and click the "Abdullah Gül Üniversitesi" link
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table-striped tbody tr")
    for row in rows:
        try:
            university_name = row.find_element(By.TAG_NAME, "a").text.strip()
            if university_name == "ABDULLAH GÜL ÜNİVERSİTESİ":
                row.find_element(By.TAG_NAME, "a").click()
                print(f"Navigated to academics page for {university_name}.")
                return True
        except Exception as e:
            logging.error(f"Error navigating to academic page: {e}", exc_info=True)

    print("Abdullah Gül Üniversitesi not found in the list.")
    return False

def scrape_academics(driver, university_name):
    """Scrape all academic data for the given university."""
    academics = []
    page = 1

    while True:
        try:
            print(f"Scraping page {page}...")

            # Wait for the academic table to load
            rows = WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table#authorlistTb tbody tr"))
            )

            # Extract academic details from the table rows
            for row in rows:
                try:
                    name = row.find_element(By.CSS_SELECTOR, "h4 a").text.strip()
                    title = row.find_element(By.CSS_SELECTOR, "h6").text.strip()
                    details = row.find_element(By.CSS_SELECTOR, "td:nth-child(3)").text.strip()
                    academics.append({
                        "University Name": university_name,
                        "Academic Name": name,
                        "Title": title,
                        "Details": details
                    })
                except Exception as e:
                    logging.error(f"Error extracting academic data: {e}", exc_info=True)

            # Look for the "Next" button
            try:
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, "ul.pagination li a[rel='next']"))
                )
                next_button.click()  # Click "Next"
                page += 1
                time.sleep(random.uniform(2, 4))  # Random delay to mimic human behavior
            except Exception:
                print("No more pages or 'Next' button not found.")
                break

        except Exception as e:
            logging.error(f"Error occurred while scraping academics: {e}", exc_info=True)
            break

    return academics

def main():
    driver = setup_driver()
    all_data = []

    try:
        # Navigate to Abdullah Gül Üniversitesi's academic page
        if not navigate_to_academic_page(driver):
            print("Failed to navigate to Abdullah Gül Üniversitesi's academic page.")
            return

        # Scrape academic data
        university_name = "ABDULLAH GÜL ÜNİVERSİTESİ"
        print(f"Scraping academics for {university_name}...")
        data = scrape_academics(driver, university_name)
        print(f"Scraped data: {len(data)} academics.")
        all_data.extend(data)

        # Save to CSV
        if all_data:
            df = pd.DataFrame(all_data)
            df.to_csv("abdullah_gul_universitesi_academics.csv", index=False, encoding="utf-8-sig")
            print("Scraping completed. Data saved to 'abdullah_gul_universitesi_academics.csv'.")
        else:
            print("No data was scraped. Check the website or script for issues.")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Navigated to academics page for ABDULLAH GÜL ÜNİVERSİTESİ.
Scraping academics for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 1...
No more pages or 'Next' button not found.
Scraped data: 20 academics.
Scraping completed. Data saved to 'abdullah_gul_universitesi_academics.csv'.


In [11]:
import logging
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Set up logging
logging.basicConfig(
    filename="scraper.log",
    level=logging.ERROR,
    format="%(asctime)s - %(message)s"
)

def setup_driver():
    """Set up the Selenium WebDriver."""
    chrome_options = Options()
    chrome_options.binary_location = r"chrome.exe"
    service = Service(executable_path=r"chromedriver.exe")
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def navigate_to_academic_page(driver):
    """Navigate to the academic page for Abdullah Gül Üniversitesi."""
    driver.get("https://akademik.yok.gov.tr/AkademikArama/view/universityListview.jsp")

    # Wait for the table to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-striped tbody"))
    )

    # Find and click the "Abdullah Gül Üniversitesi" link
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table-striped tbody tr")
    for row in rows:
        try:
            university_name = row.find_element(By.TAG_NAME, "a").text.strip()
            if university_name == "ABDULLAH GÜL ÜNİVERSİTESİ":
                row.find_element(By.TAG_NAME, "a").click()
                print(f"Navigated to academics page for {university_name}.")
                return True
        except Exception as e:
            logging.error(f"Error navigating to academic page: {e}", exc_info=True)

    print("Abdullah Gül Üniversitesi not found in the list.")
    return False

def scrape_academics(driver, university_name):
    """Scrape all academic data for the given university."""
    academics = []
    page = 1

    while True:
        try:
            print(f"Scraping page {page}...")

            # Wait for the academic table to load
            rows = WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table#authorlistTb tbody tr"))
            )

            # Extract academic details from the table rows
            for row in rows:
                try:
                    name = row.find_element(By.CSS_SELECTOR, "h4 a").text.strip()
                    title = row.find_element(By.CSS_SELECTOR, "h6").text.strip()
                    details = row.find_element(By.CSS_SELECTOR, "td:nth-child(3)").text.strip()
                    academics.append({
                        "University Name": university_name,
                        "Academic Name": name,
                        "Title": title,
                        "Details": details
                    })
                except Exception as e:
                    logging.error(f"Error extracting academic data: {e}", exc_info=True)

            # Pagination: Find the "Next" page button
            try:
                # Locate the pagination block
                pagination = driver.find_element(By.CSS_SELECTOR, "ul.pagination")

                # Find the active page
                active_page = pagination.find_element(By.CSS_SELECTOR, "li.active")

                # Locate the next sibling of the active page
                next_page = active_page.find_element(By.XPATH, "following-sibling::li/a")

                # Click the "Next" page button
                next_page.click()
                page += 1
                time.sleep(random.uniform(2, 4))  # Random delay to mimic human behavior
            except Exception:
                print("No more pages or 'Next' button not found.")
                break

        except Exception as e:
            logging.error(f"Error occurred while scraping academics: {e}", exc_info=True)
            break

    return academics

def main():
    driver = setup_driver()
    all_data = []

    try:
        # Navigate to Abdullah Gül Üniversitesi's academic page
        if not navigate_to_academic_page(driver):
            print("Failed to navigate to Abdullah Gül Üniversitesi's academic page.")
            return

        # Scrape academic data
        university_name = "ABDULLAH GÜL ÜNİVERSİTESİ"
        print(f"Scraping academics for {university_name}...")
        data = scrape_academics(driver, university_name)
        print(f"Scraped data: {len(data)} academics.")
        all_data.extend(data)

        # Save to CSV
        if all_data:
            df = pd.DataFrame(all_data)
            df.to_csv("abdullah_gul_universitesi_academics.csv", index=False, encoding="utf-8-sig")
            print("Scraping completed. Data saved to 'abdullah_gul_universitesi_academics.csv'.")
        else:
            print("No data was scraped. Check the website or script for issues.")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Navigated to academics page for ABDULLAH GÜL ÜNİVERSİTESİ.
Scraping academics for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
No more pages or 'Next' button not found.
Scraped data: 285 academics.
Scraping completed. Data saved to 'abdullah_gul_universitesi_academics.csv'.


In [13]:
import pandas as pd

def print_csv(file_name):
    """Prints the contents of a CSV file."""
    try:
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_name, encoding="utf-8-sig")

        # Print the contents of the DataFrame
        print(df.to_string(index=False))  # Avoid printing row indices for cleaner output

        # Optionally, print the total number of rows
        print(f"\nTotal academics: {len(df)}")
    except FileNotFoundError:
        print(f"Error: File '{file_name}' not found.")
    except Exception as e:
        print(f"An error occurred while reading the CSV: {e}")

# Call the function
print_csv("abdullah_gul_universitesi_academics.csv")


          University Name                Academic Name                               Title                                                                                                                                                                                                                                                                                                                                                                   Details
ABDULLAH GÜL ÜNİVERSİTESİ           BURAK ASILİSKENDER                            PROFESÖR                                                                                                                                            PROFESÖR\nBURAK ASILİSKENDER\nABDULLAH GÜL ÜNİVERSİTESİ/MİMARLIK FAKÜLTESİ/MİMARLIK BÖLÜMÜ/MİMARİ TASARIM VE ELEŞTİRİ ANABİLİM DALI/\nMimarlık-Planlama-Tasarım   Mimarlık Mimari Tasarım ; Mimarlıkta Kuram- Eleştiri- Yöntem
ABDULLAH GÜL ÜNİVERSİTESİ                   BURAK UZAL                            PROFESÖR    

In [None]:
import logging
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Set up logging
logging.basicConfig(
    filename="scraper.log",
    level=logging.ERROR,
    format="%(asctime)s - %(message)s"
)

def setup_driver():
    """Set up the Selenium WebDriver."""
    chrome_options = Options()
    chrome_options.binary_location = r"chrome.exe"
    service = Service(executable_path=r"chromedriver.exe")
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def get_university_links(driver):
    """Scrape all university names and links from the main page."""
    driver.get("https://akademik.yok.gov.tr/AkademikArama/view/universityListview.jsp")

    # Wait for the university list to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-striped tbody"))
    )

    # Extract university names and links
    universities = []
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table-striped tbody tr")
    for row in rows:
        try:
            name = row.find_element(By.TAG_NAME, "a").text.strip()
            link = row.find_element(By.TAG_NAME, "a").get_attribute("href")
            universities.append({"name": name, "link": link})
        except Exception as e:
            logging.error(f"Error extracting university data: {e}", exc_info=True)

    return universities

def scrape_academics(driver, university_name):
    """Scrape academic data for a specific university."""
    academics = []
    page = 1

    while True:
        try:
            print(f"Scraping page {page} for {university_name}...")

            # Wait for the academic table to load
            rows = WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table#authorlistTb tbody tr"))
            )

            # Extract academic details from the table rows
            for row in rows:
                try:
                    name = row.find_element(By.CSS_SELECTOR, "h4 a").text.strip()
                    title = row.find_element(By.CSS_SELECTOR, "h6").text.strip()
                    details = row.find_element(By.CSS_SELECTOR, "td:nth-child(3)").text.strip()
                    academics.append({
                        "University Name": university_name,
                        "Academic Name": name,
                        "Title": title,
                        "Details": details
                    })
                except Exception as e:
                    logging.error(f"Error extracting academic data: {e}", exc_info=True)

            # Pagination: Find the "Next" page button
            try:
                pagination = driver.find_element(By.CSS_SELECTOR, "ul.pagination")
                active_page = pagination.find_element(By.CSS_SELECTOR, "li.active")
                next_page = active_page.find_element(By.XPATH, "following-sibling::li/a")
                next_page.click()
                page += 1
                time.sleep(random.uniform(2, 4))  # Random delay to mimic human behavior
            except Exception:
                print(f"No more pages for {university_name}.")
                break

        except Exception as e:
            logging.error(f"Error occurred while scraping academics for {university_name}: {e}", exc_info=True)
            break

    return academics

def main():
    driver = setup_driver()
    all_data = []

    try:
        # Step 1: Get all university links
        universities = get_university_links(driver)
        print(f"Found {len(universities)} universities.")

        # Step 2: Scrape each university's academic data
        for university in universities:
            university_name = university["name"]
            university_link = university["link"]

            print(f"Scraping academics for {university_name}...")
            driver.get(university_link)  # Navigate to the university's academic page

            # Scrape academics
            data = scrape_academics(driver, university_name)
            print(f"Scraped {len(data)} academics for {university_name}.")
            all_data.extend(data)

        # Step 3: Save all data to a CSV file
        if all_data:
            df = pd.DataFrame(all_data)
            df.to_csv("all_universities_academics.csv", index=False, encoding="utf-8-sig")
            print("Scraping completed. Data saved to 'all_universities_academics.csv'.")
        else:
            print("No data was scraped. Check the website or script for issues.")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Found 208 universities.
Scraping academics for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 1 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 2 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 3 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 4 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 5 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 6 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 7 for ABDULLAH GÜL ÜNİVERSİTESİ...
Scraping page 8 for ABDULLAH GÜL ÜNİVERSİTESİ...


In [18]:
import pandas as pd

def print_csv(file_name):
    """Prints the contents of a CSV file."""
    try:
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_name, encoding="utf-8-sig")

        # Print the contents of the DataFrame
        print(df.to_string(index=False))  # Avoid printing row indices for cleaner output

        # Optionally, print the total number of rows
        print(f"\nTotal academics: {len(df)}")
    except FileNotFoundError:
        print(f"Error: File '{file_name}' not found.")
    except Exception as e:
        print(f"An error occurred while reading the CSV: {e}")

# Call the function
print_csv("all_data.csv")


Error: File 'all_data.csv' not found.
