# PSX Daily Data Scraper from 2014-02-04 to  2014-02-28

Code

In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os
from datetime import datetime, timedelta

# URL of the page
url = 'https://dps.psx.com.pk/daily-downloads'

# Set the desired year
desired_year = 2024

# Set headers for the request
headers = {
    'Accept': 'text/html, */*; q=0.01',
    'Accept-Language': 'en-US,en;q=0.9,ar;q=0.8',
    'Connection': 'keep-alive',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Referer': 'https://dps.psx.com.pk/downloads',
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
}

# Iterate over the months (Jan to Dec) within the desired year
for month in range(10, 13):
    # Create a folder for the current month
    month_name = datetime(desired_year, month, 1).strftime('%b')  # e.g., Dec
    month_folder = os.path.join('Date wise data', str(desired_year), month_name)
    os.makedirs(month_folder, exist_ok=True)

    # Iterate over the days in the month
    current_date = datetime(desired_year, month, 1)
    while current_date.month == month:
        # Calculate the current date parameter
        date_param = current_date.strftime('%Y-%m-%d')

        # Create a folder for the current date
        current_run_folder = os.path.join(month_folder, date_param)
        os.makedirs(current_run_folder, exist_ok=True)

        # Make the request
        response = requests.post(url, headers=headers, data={'date': date_param})

        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
    
        # Extract download links
        files_to_download = {
            'Market Summary (Closing)': 'z',
            'Closing Rate Summary': 'pdf',
            'Symbol Price (Upper/Lower)': 'zip',
            'Symbols Short Long Name': 'zip',
        }

        downloaded_count = 0
        for div in soup.find_all('div', class_='downloads'):
            for li in div.find_all('li'):
                link_text = li.text.strip().split(' — ')[0]
                anchor = li.a
                if anchor:
                    file_type = anchor['class'][0]
                    file_url = urljoin(url, anchor['href'])

                    # Exclude files not in the download list
                    if link_text not in files_to_download:
                        continue

                    # Customize file name as needed
                    if link_text == 'Market Summary (Closing)':
                        file_name = 'market_summary' + f".{files_to_download[link_text]}"
                    elif link_text == 'Symbol Price (Upper/Lower)':
                        file_name = 'symbol_price' + f".{files_to_download[link_text]}"
                    elif link_text == 'Symbols Short Long Name':
                        file_name = 'symbols_short' + f".{files_to_download[link_text]}"
                    else:
                        sanitized_name = link_text.replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '')
                        file_name = f"{sanitized_name}.{files_to_download[link_text]}"

                    file_path = os.path.join(current_run_folder, file_name)

                    # Download the file
                    file_content = requests.get(file_url).content
                    with open(file_path, 'wb') as file:
                        file.write(file_content)

                    print(f"Downloaded: {file_path}")

                    # Increment the downloaded count
                    downloaded_count += 1

                    # Check if the desired number of files have been downloaded
                    if downloaded_count == len(files_to_download):
                        break

            # Break outer loop if the desired number of files have been downloaded
            if downloaded_count == len(files_to_download):
                break

        if downloaded_count == 0:
            print(f"No matching files available for download on {date_param}")
        else:
            print(f"All downloads completed on {date_param}")

        # Move to the next date
        current_date += timedelta(days=1)

Downloaded: Date wise data/2024/Oct/2024-10-01/market_summary.z
Downloaded: Date wise data/2024/Oct/2024-10-01/Closing_Rate_Summary.pdf
Downloaded: Date wise data/2024/Oct/2024-10-01/symbol_price.zip
Downloaded: Date wise data/2024/Oct/2024-10-01/symbols_short.zip
All downloads completed on 2024-10-01
Downloaded: Date wise data/2024/Oct/2024-10-02/market_summary.z
Downloaded: Date wise data/2024/Oct/2024-10-02/Closing_Rate_Summary.pdf
Downloaded: Date wise data/2024/Oct/2024-10-02/symbol_price.zip
Downloaded: Date wise data/2024/Oct/2024-10-02/symbols_short.zip
All downloads completed on 2024-10-02
Downloaded: Date wise data/2024/Oct/2024-10-03/market_summary.z
Downloaded: Date wise data/2024/Oct/2024-10-03/Closing_Rate_Summary.pdf
Downloaded: Date wise data/2024/Oct/2024-10-03/symbol_price.zip
Downloaded: Date wise data/2024/Oct/2024-10-03/symbols_short.zip
All downloads completed on 2024-10-03
Downloaded: Date wise data/2024/Oct/2024-10-04/market_summary.z
Downloaded: Date wise data/