In [3]:
import requests
import pandas as pd
import re
import csv

# Base URL for the reports
base_url = "https://www.chainabuse.com/_next/data/J8xBrAMAZD8pw5GgZq_Cf/en/reports.json?page={page_number}"

# Prepare to save data in CSV format
csv_filename = "report_details.csv"
csv_columns = ["Description", "Reported By", "Scam Category", "Report ID", "Source"]

# Open the CSV file in write mode
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=csv_columns)
    writer.writeheader()  # Write the header row

    # Loop through the first 100 pages
    for page_number in range(2):  # Pages 0 to 99
        # Construct the full URL for the current page
        url = base_url.format(page_number=page_number)

        # Fetch JSON data from the current page
        response = requests.get(url)
        text_data = response.text  # Get the response as text

        # Regular expression to find report IDs
        report_ids = re.findall(r'"Report","id":"(.*?)"', text_data)

        # Base URL for fetching report details
        report_base_url = "https://www.chainabuse.com/_next/data/J8xBrAMAZD8pw5GgZq_Cf/en/report/{report_id}.json?context=browse-chain&chain=BTC&reportId={report_id}"

        # Loop through each report ID
        for report_id in report_ids:
            # Construct the full URL for the report
            report_url = report_base_url.format(report_id=report_id)

            # Fetch the report data
            report_response = requests.get(report_url)

            if report_response.status_code == 200:
                # Parse the JSON data from the response
                report_data = report_response.json()

                # Extract the report details from the 'pageProps' field
                report_info = report_data.get('pageProps', {})

                # Extracting required values
                description = report_info.get("description", "No description found")
                reported_by = report_info.get("reportedBy", "No reportedBy found")
                scam_category = report_info.get("scamCategory", "No scamCategory found")
                report_id_value = report_data.get('pageProps', {}).get('initialApolloState', {}).get(f'Report:{report_id}', {}).get('id', "No report ID found")

                # Extract the source from the initialApolloState field
                source = report_data.get('pageProps', {}).get('initialApolloState', {}).get(f'Report:{report_id}', {}).get('source', "No source found")

                # Writing the extracted values to the CSV file
                writer.writerow({
                    "Description": description,
                    "Reported By": reported_by,
                    "Scam Category": scam_category,
                    "Report ID": report_id_value,
                    "Source": source  # Add source dynamically
                })
            else:
                print(f"Failed to fetch details for the report with ID: {report_id}")

        print(f"Finished processing page {page_number + 1} of 100")

print(f"Data has been saved to {csv_filename}")


Finished processing page 1 of 100
Finished processing page 2 of 100
Data has been saved to report_details.csv
