<a href="https://colab.research.google.com/github/elephant-xyz/notebook/blob/main/mining_county.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#County Mining process

In [None]:
# @title Step 1: Upload .env

In [None]:
# @title Step 2: Upload seed-results.csv

In [None]:
# @title Step 3: Prepare
import pandas as pd
import requests
import json
import logging
import csv
import os
import time
from urllib.parse import urlencode
from typing import Optional, Dict, Any
import traceback

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class PropertyDataProcessor:
    def __init__(self, input_csv_path: str = "seed-results.csv", seed_csv_path: str = "seed.csv"):
        self.input_csv_path = input_csv_path
        self.seed_csv_path = seed_csv_path
        self.processed_parcels = []  # Track all processed parcel IDs
        self.ipfs_gateways = [
            "https://ipfs.io/ipfs/",
            "https://gateway.pinata.cloud/ipfs/",
            "https://cloudflare-ipfs.com/ipfs/",
            "https://dweb.link/ipfs/",
            "https://ipfs.infura.io/ipfs/"
        ]

    def fetch_from_ipfs(self, cid: str) -> Optional[Dict[Any, Any]]:
        """Fetch data from IPFS using the provided CID with multiple gateway fallback."""
        for gateway in self.ipfs_gateways:
            try:
                url = f"{gateway}{cid}"
                logger.info(f"Trying to fetch {cid} from {gateway}")
                response = requests.get(url, timeout=10)
                response.raise_for_status()
                return response.json()
            except Exception as e:
                logger.warning(f"Error fetching from {gateway}: {e}")
                continue

        logger.error(f"Failed to fetch data from IPFS CID {cid} from all gateways")
        return None

    def trace_ipfs_chain(self, data_cid: str) -> Optional[Dict[Any, Any]]:
        """Trace through the IPFS chain to get the final property data."""

        # Step 1: Fetch the initial data using dataCid
        logger.info(f"Step 1: Fetching initial data from dataCid: {data_cid}")
        initial_data = self.fetch_from_ipfs(data_cid)
        if not initial_data:
            return None

        # Step 2: Extract property_seed CID from relationships
        try:
            property_seed_cid = initial_data["relationships"]["property_seed"]["/"]
            logger.info(f"Step 2: Found property_seed CID: {property_seed_cid}")
        except KeyError as e:
            logger.error(f"Could not find property_seed CID in initial data: {e}")
            return None

        # Step 3: Fetch property_seed data
        logger.info(f"Step 3: Fetching property_seed data from: {property_seed_cid}")
        property_seed_data = self.fetch_from_ipfs(property_seed_cid)
        if not property_seed_data:
            return None

        # Step 4: Extract "to" CID from property_seed data
        try:
            to_cid = property_seed_data["to"]["/"]
            logger.info(f"Step 4: Found 'to' CID: {to_cid}")
        except KeyError as e:
            logger.error(f"Could not find 'to' CID in property_seed data: {e}")
            return None

        # Step 5: Fetch final property data
        logger.info(f"Step 5: Fetching final property data from: {to_cid}")
        final_data = self.fetch_from_ipfs(to_cid)

        return final_data

    def create_seed_csv(self):
        """Read the input CSV, trace IPFS chain, and create seed.csv."""

        # Read the input CSV
        try:
            df = pd.read_csv(self.input_csv_path)
            logger.info(f"Loaded {len(df)} records from {self.input_csv_path}")
        except Exception as e:
            logger.error(f"Error reading CSV file: {e}")
            return False

        # Prepare output data
        output_rows = []

        for index, row in df.iterrows():
            data_cid = row['dataCid']
            logger.info(f"Processing row {index + 1}: {data_cid}")

            # Trace the IPFS chain
            final_data = self.trace_ipfs_chain(data_cid)

            if final_data:
                try:
                    # Extract data for CSV
                    parcel_id = final_data.get('request_identifier', '')
                    address = final_data.get('full_address', '')
                    county = final_data.get('county_jurisdiction', '')

                    # Track this parcel ID
                    if parcel_id:
                        self.processed_parcels.append(parcel_id)

                    # Extract HTTP request details
                    http_request = final_data.get('source_http_request', {})
                    method = http_request.get('method', '')
                    url = http_request.get('url', '')
                    multi_value_query_string = http_request.get('multiValueQueryString', {})

                    # Convert multiValueQueryString to JSON string for CSV
                    multi_value_query_string_str = json.dumps(multi_value_query_string) if multi_value_query_string else ''

                    # Create output row
                    output_row = {
                        'parcel_id': parcel_id,
                        'Address': address,
                        'method': method,
                        'headers': '',  # Empty as per example
                        'url': url,
                        'multiValueQueryString': multi_value_query_string_str,
                        'body': '',  # Empty as per example
                        'json': '',  # Empty as per example
                        'source_identifier': parcel_id,  # Same as parcel_id based on example
                        'County': county
                    }

                    output_rows.append(output_row)
                    logger.info(f"Successfully processed parcel ID: {parcel_id}")

                except Exception as e:
                    logger.error(f"Error processing final data for row {index + 1}: {e}")
            else:
                logger.error(f"Failed to trace IPFS chain for row {index + 1}")

        # Create output DataFrame and save to CSV
        if output_rows:
            output_df = pd.DataFrame(output_rows)
            output_df.to_csv(self.seed_csv_path, index=False)
            return True
        else:
            logger.error("No data was successfully processed")
            print("No data was successfully processed")
            return False

    def create_output_directory(self):
        """Create the input directory if it doesn't exist"""
        if not os.path.exists('input'):
            os.makedirs('input')
            logger.info("Created 'input' directory")

    def parse_multi_value_query_string(self, query_string_json):
        """Parse the multiValueQueryString JSON and convert to URL parameters"""
        try:
            if not query_string_json or query_string_json.strip() == '':
                return {}

            query_data = json.loads(query_string_json)
            # Convert multi-value query string to regular query parameters
            params = {}
            for key, values in query_data.items():
                if isinstance(values, list) and len(values) > 0:
                    params[key] = values[0]  # Take the first value
                else:
                    params[key] = values
            return params
        except json.JSONDecodeError as e:
            logger.error(f"Error parsing query string JSON: {e}")
            return {}

    def make_request(self, row):
        """Make HTTP request based on CSV row data"""
        try:
            parcel_id = row['parcel_id']
            address = row['Address']
            method = row['method'].upper()
            url = row['url']
            query_params = self.parse_multi_value_query_string(row['multiValueQueryString'])

            logger.info(f"Processing parcel {parcel_id} at {address}")

            # Use headers from CSV if provided, otherwise use minimal headers
            request_headers = {}
            if row.get('headers') and row['headers'].strip():
                try:
                    request_headers = json.loads(row['headers'])
                except json.JSONDecodeError:
                    logger.warning(f"Invalid headers JSON for parcel {parcel_id}, using minimal headers")

            # If no headers provided or parsing failed, use minimal headers
            if not request_headers:
                request_headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                }

            # Make the request
            if method == 'GET':
                response = requests.get(url, params=query_params, headers=request_headers, timeout=30)
            elif method == 'POST':
                # Handle POST request with body if provided
                post_data = {}
                if row.get('body') and row['body'].strip():
                    try:
                        post_data = json.loads(row['body'])
                    except json.JSONDecodeError:
                        logger.warning(f"Invalid body JSON for parcel {parcel_id}, using empty body")

                # Handle JSON data if provided
                if row.get('json') and row['json'].strip():
                    try:
                        json_data = json.loads(row['json'])
                        response = requests.post(url, params=query_params, headers=request_headers, json=json_data, timeout=30)
                    except json.JSONDecodeError:
                        logger.warning(f"Invalid JSON data for parcel {parcel_id}, using form data")
                        response = requests.post(url, params=query_params, headers=request_headers, data=post_data, timeout=30)
                else:
                    response = requests.post(url, params=query_params, headers=request_headers, data=post_data, timeout=30)
            else:
                logger.warning(f"Unsupported method {method} for parcel {parcel_id}")
                return False

            response.raise_for_status()

            # Save the HTML content
            filename = f"input/{parcel_id}.html"
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(response.text)

            logger.info(f"Successfully saved {filename}")
            return True

        except requests.exceptions.RequestException as e:
            logger.error(f"Request failed for parcel {parcel_id}: {e}")
            return False
        except Exception as e:
            logger.error(f"Unexpected error processing parcel {parcel_id}: {e}")
            return False

    def download_property_data(self):
        """Read seed CSV and download property data for each parcel"""
        successful_downloads = 0
        failed_downloads = 0

        try:
            with open(self.seed_csv_path, 'r', newline='', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)

                # Print available columns for debugging
                logger.info(f"Available columns: {reader.fieldnames}")

                # Verify required columns exist
                required_columns = ['parcel_id', 'Address', 'method', 'url', 'multiValueQueryString']
                missing_columns = [col for col in required_columns if col not in reader.fieldnames]
                if missing_columns:
                    logger.error(f"Missing required columns: {missing_columns}")
                    return False

                logger.info(f"Starting to process seed CSV file: {self.seed_csv_path}")

                # Convert reader to list to see total count
                rows = list(reader)
                total_rows = len(rows)
                logger.info(f"Found {total_rows} rows to process")

                for row_num, row in enumerate(rows, start=1):
                    logger.info(f"Processing row {row_num}/{total_rows} - Parcel: {row.get('parcel_id', 'Unknown')}")

                    try:
                        if self.make_request(row):
                            successful_downloads += 1
                            logger.info(f"✓ Successfully processed parcel {row.get('parcel_id')}")
                        else:
                            failed_downloads += 1
                            logger.error(f"✗ Failed to process parcel {row.get('parcel_id')}")
                    except Exception as e:
                        failed_downloads += 1
                        logger.error(f"✗ Exception processing parcel {row.get('parcel_id')}: {e}")

                    # Add a small delay to be respectful to the server
                    time.sleep(1)

            logger.info(f"Download complete. Successful: {successful_downloads}, Failed: {failed_downloads}")
            return True

        except FileNotFoundError:
            logger.error(f"Seed CSV file '{self.seed_csv_path}' not found")
            return False
        except Exception as e:
            logger.error(f"Error processing seed CSV file: {e}")
            logger.error(f"Full traceback: {traceback.format_exc()}")
            return False

    def run_complete_process(self):
        """Run the complete process: IPFS data fetching + property download"""
        logger.info("=" * 60)
        logger.info("STARTING COMPLETE PROPERTY DATA PROCESSING")
        logger.info("=" * 60)

        # Step 1: Create seed CSV from IPFS data
        logger.info("STEP 1: Processing IPFS data to create seed CSV...")
        if not self.create_seed_csv():
            logger.error("Failed to create seed CSV. Aborting.")
            return False

        logger.info("STEP 1 COMPLETED: Seed CSV created successfully")
        logger.info("-" * 40)

        # Step 2: Create output directory for HTML files
        logger.info("STEP 2: Creating output directory...")
        self.create_output_directory()
        logger.info("STEP 2 COMPLETED: Output directory ready")
        logger.info("-" * 40)

        # Step 3: Download property data
        logger.info("STEP 3: Downloading property data from county websites...")
        if not self.download_property_data():
            logger.error("Failed to download property data.")
            return False

        logger.info("STEP 3 COMPLETED: Property data download finished")
        logger.info("=" * 60)
        logger.info("COMPLETE PROCESS FINISHED SUCCESSFULLY")
        logger.info("=" * 60)
        return True


def main():
    """Main function to run the complete property data processor"""

    # Initialize the processor with default file paths
    # You can modify these paths as needed
    processor = PropertyDataProcessor(
        input_csv_path="seed-results.csv",  # Input CSV with dataCid column
        seed_csv_path="seed.csv"              # Output seed CSV and input for downloads
    )

    # Run the complete process
    success = processor.run_complete_process()

    if success:
        # Show all processed parcel IDs
        if processor.processed_parcels:
            parcels_str = ", ".join(processor.processed_parcels)
            print(f"✅ Prepare done for parcel IDs: {parcels_str}")
        else:
            print("✅ Prepare done (no parcel IDs found)")
    else:
        print("❌ Prepare Failed")
        print("Check the logs above for detailed error information")


if __name__ == "__main__":
    main()

✅ Prepare done for parcel IDs: 52434205310037080


In [None]:
# @title Step 4: Transform
#!/usr/bin/env python3
import subprocess
import sys
import shutil
import os
import csv

def install_dependencies():
    """Install required dependencies"""
    try:
        subprocess.run([
            sys.executable, "-m", "pip", "install", "python-dotenv", "-q"
        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        return True
    except:
        print("❌ Failed to install dependencies")
        return False

def load_environment():
    """Load environment variables from .env file"""
    try:
        from dotenv import load_dotenv
        load_dotenv()
        return True
    except ImportError:
        print("❌ Failed to import dotenv")
        return False

def check_env_file():
    """Check if .env file exists"""
    if not os.path.exists(".env"):
        print("❌ Transform failed: .env file not found")
        print("Please upload the .env file to continue")
        return False
    return True

def copy_seed_results():
    """Copy seed-results.csv to upload-results.csv"""
    try:
        if not os.path.exists("seed-results.csv"):
            print("❌ Transform failed: seed-results.csv not found")
            return False

        shutil.copy2("seed-results.csv", "upload-results.csv")
        return True

    except Exception as e:
        print("❌ Transform failed: Could not copy seed-results.csv")
        return False

def run_transform():
    """Run the transform command and suppress logs"""

    command = [
        "uvx",
        "--from",
        "git+https://github.com/elephant-xyz/AI-Agent",
        "test-evaluator-agent"
    ]

    try:
        print("🔄 Transforming running...")

        # Run the command and suppress all output
        result = subprocess.run(
            command,
            stdout=subprocess.DEVNULL,  # Suppress stdout
            stderr=subprocess.DEVNULL,  # Suppress stderr
            check=True  # Raise exception if command fails
        )

        return True

    except:
        print("❌ Transform failed")
        return False

def get_seed_cid_and_html_link(path="county-results.csv"):
    """Get County CID and HTML link from CSV file"""
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["dataGroupCid"], first_row["htmlLink"]

def has_submit_errors(path="submit_errors.csv"):
    """
    Returns True if submit_errors.csv has at least one row (after header).
    """
    try:
        with open(path, newline='', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            return next(reader, None) is not None
    except FileNotFoundError:
        return False

def run_validate_and_upload():
    """Run validation and upload process"""
    try:

        subprocess.run(
            ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "submit", "--output-csv", "county-results.csv"],
            stdout=subprocess.DEVNULL,    # hide stdout
            stderr=subprocess.PIPE,       # capture stderr
            check=True,
            text=True                     # stderr as string
        )

        # If there are recorded errors - stop execution
        if has_submit_errors():
            print("❌ Transform failed, please check submit_errors.csv for details", file=sys.stderr)
            return False

        # Otherwise - read results
        seed_group_cid, html_link = get_seed_cid_and_html_link()
        print("✅ Transform done\n")
        print(f"county group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}")
        return True

    except subprocess.CalledProcessError as e:
        # handle command execution errors
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        if e.stderr:
            print(e.stderr.strip(), file=sys.stderr)
        return False
    except Exception as e:
        print(f"❌ Validation and upload failed: {str(e)}")
        return False

def main():
    """Main function"""
    # Step 1: Install dependencies
    if not install_dependencies():
        return False

    # Step 2: Load environment
    if not load_environment():
        return False

    # Step 3: Check for .env file
    if not check_env_file():
        return False

    # Step 4: Copy seed-results.csv to upload-results.csv
    if not copy_seed_results():
        return False

    # Step 5: Run the transform command
    if not run_transform():
        return False

    # Step 6: Run validation and upload
    success = run_validate_and_upload()
    return success

if __name__ == "__main__":
    success = main()
    # Don't use sys.exit() in Jupyter/IPython environments

    if not success:
        print("Process completed with errors")


✅ Transform done

county group CID: bafkreie5pbx4k3wt3fnd4qewthsde2jxewm3krcgn72ecbyvnzqhaeylce

HTML link: http://dweb.link/ipfs/bafybeibrz2cupddfmxuzmk4b5ghun32hno4oyw2ehtooyoipottm7qeivu


In [None]:
# @title Step 5: Validate
! pip3 install python-dotenv -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv


def get_seed_cid_and_html_link(path="county-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["dataGroupCid"], first_row["htmlLink"]


def has_submit_errors(path="submit_errors.csv"):
    """
    Повертає True, якщо у файлі submit_errors.csv є хоча б один рядок (після заголовку).
    """
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def run_validate_and_upload():
    try:
        subprocess.run(
            ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "submit", "--output-csv", "county-results.csv"],
            stdout=subprocess.DEVNULL,    # ховаємо stdout
            stderr=subprocess.PIPE,       # ловимо stderr у буфер
            check=True,
            text=True                     # stderr як рядок
        )
        # Якщо є записані помилки — завершуємо роботу
        if has_submit_errors():
            print("❌ Validate failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        # Інакше — читаємо результати
        seed_group_cid, html_link = get_seed_cid_and_html_link()
        print("✅ Validate done\n")
        print(f"County group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}")

    except subprocess.CalledProcessError as e:
        # обробка помилок виконання команди
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_validate_and_upload()


✅ Validate done

County group CID: bafkreiequh74xoafkabgtqnsyvynus5x6wmmh3k67v6cqrusplkhxuyvke

HTML link: http://dweb.link/ipfs/bafybeia4j33blycalqtf3pxtzq5k5iams7ax7lbaxblyk5p2cwm5xfedhi


In [None]:
# @title Step 6: Upload
! pip3 install python-dotenv requests -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv

import requests


def get_seed_info(path="county-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row


def has_submit_errors(path="submit_errors.csv"):
    """
    Повертає True, якщо у файлі submit_errors.csv є хоча б один рядок (після заголовку).
    """
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def count_upload_records(path="county-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return sum(1 for _ in reader)


def fetch_with_fallback(cid, gateways=None):
    """
    Try to fetch IPFS content from multiple gateways with fallback
    """
    if gateways is None:
        gateways = [
            "https://ipfs.io/ipfs/",
            "https://gateway.pinata.cloud/ipfs/",
            "https://dweb.link/ipfs/",
            "https://cloudflare-ipfs.com/ipfs/"
        ]

    for gateway in gateways:
        try:
            url = f"{gateway}{cid}"
            response = requests.get(url, timeout=10)
            if response.status_code == 200 and response.text.strip():
                return response
        except Exception:
            continue

    # If all gateways fail, return None
    return None


def collect_data_ipfs_links(data_cid):
    """
    Collect IPFS links for County data structure
    """
    try:
        response = fetch_with_fallback(data_cid)
        if response is None:
            print(f"Error: Could not fetch seed data from any gateway for CID: {data_cid}", file=sys.stderr)
            return {}
        seed_data = response.json()
    except Exception as e:
        print(f"Error fetching seed data: {e}", file=sys.stderr)
        return {}

    entity_links = {}  # For actual entities (person, property, etc.)
    relationship_links = {}  # For relationship objects
    url_to_name = {}  # Track which URLs we've already seen and their preferred names

    # Get relationships from the County data
    relationships = seed_data.get("relationships", {})

    # Process required single-value relationships
    for rel_name in ["property_has_address", "property_has_lot", "property_has_structure", "property_has_utility", "property_has_flood_storm_information"]:
        if rel_name in relationships and relationships[rel_name]:
            rel_cid = relationships[rel_name].get("/") if isinstance(relationships[rel_name], dict) else relationships[rel_name]
            if rel_cid:
                # Try to get the referenced data and extract 'to' and 'from' fields FIRST
                try:
                    response = fetch_with_fallback(rel_cid)
                    if response is None:
                        print(f"Warning: Could not fetch {rel_name} from any gateway: {rel_cid}", file=sys.stderr)
                        continue

                    rel_data = response.json()

                    if "to" in rel_data and "/" in rel_data["to"]:
                        to_cid = rel_data["to"]["/"]
                        # Extract x and y from x_has_y pattern
                        parts = rel_name.split("_has_")
                        x = parts[0] if len(parts) > 0 else "unknown"
                        y = parts[1] if len(parts) > 1 else "unknown"

                        # Handle 'from' URL
                        if "from" in rel_data and "/" in rel_data["from"]:
                            from_cid = rel_data['from']['/']
                            from_url = f"https://ipfs.io/ipfs/{from_cid}"
                            if from_url not in url_to_name:
                                url_to_name[from_url] = x
                                entity_links[x] = from_url

                        # Handle 'to' URL
                        to_url = f"https://ipfs.io/ipfs/{to_cid}"
                        if to_url not in url_to_name:
                            url_to_name[to_url] = y
                            entity_links[y] = to_url

                    # Add the relationship link itself AFTER processing entities
                    relationship_links[rel_name] = f"https://ipfs.io/ipfs/{rel_cid}"

                except ValueError as e:
                    print(f"Warning: JSON decode error for {rel_name}: {e}", file=sys.stderr)
                except Exception as e:
                    print(f"Warning: Could not fetch relationship data for {rel_name}: {e}", file=sys.stderr)

    # Process array relationships
    array_rels = [
        "company_has_property",
        "person_has_property",
        "property_has_file",
        "property_has_layout",
        "property_has_tax",
        "property_has_sales_history",
        "sales_history_has_company",
        "sales_history_has_person"
    ]

    for rel_name in array_rels:
        if rel_name in relationships and relationships[rel_name]:
            rel_array = relationships[rel_name]
            if isinstance(rel_array, list):
                for i, rel_item in enumerate(rel_array):
                    rel_cid = rel_item.get("/") if isinstance(rel_item, dict) else rel_item
                    if rel_cid:
                        # Try to get the referenced data and extract 'to' and 'from' fields FIRST
                        try:
                            response = fetch_with_fallback(rel_cid)
                            if response is None:
                                print(f"Warning: Could not fetch {rel_name}[{i}] from any gateway: {rel_cid}", file=sys.stderr)
                                continue

                            rel_data = response.json()

                            if "to" in rel_data and "/" in rel_data["to"]:
                                to_cid = rel_data["to"]["/"]
                                # Extract x and y from x_has_y pattern
                                parts = rel_name.split("_has_")
                                x = parts[0] if len(parts) > 0 else "unknown"
                                y = parts[1] if len(parts) > 1 else "unknown"

                                # Handle 'from' URL
                                if "from" in rel_data and "/" in rel_data["from"]:
                                    from_cid = rel_data['from']['/']
                                    from_url = f"https://ipfs.io/ipfs/{from_cid}"
                                    if from_url not in url_to_name:
                                        # Use index for arrays to make keys unique only if needed
                                        x_key = f"{x}_{i+1}" if x in entity_links else x
                                        url_to_name[from_url] = x_key
                                        entity_links[x_key] = from_url

                                # Handle 'to' URL
                                to_url = f"https://ipfs.io/ipfs/{to_cid}"
                                if to_url not in url_to_name:
                                    # Use index for arrays to make keys unique only if needed
                                    y_key = f"{y}_{i+1}" if y in entity_links else y
                                    url_to_name[to_url] = y_key
                                    entity_links[y_key] = to_url

                            # Add the relationship link itself with index AFTER processing entities
                            rel_key = f"{rel_name}_{i}"

                            # Special naming for person_has_property and company_has_property relationships
                            if rel_name == "person_has_property":
                                rel_key = f"person_{i+1}_has_property"
                            elif rel_name == "company_has_property":
                                rel_key = f"company_{i+1}_has_property"

                            relationship_links[rel_key] = f"https://ipfs.io/ipfs/{rel_cid}"

                        except ValueError as e:
                            print(f"Warning: JSON decode error for {rel_name}[{i}]: {e}", file=sys.stderr)
                        except Exception as e:
                            print(f"Warning: Could not fetch relationship data for {rel_name}[{i}]: {e}", file=sys.stderr)

    # Combine entity links first, then relationship links
    all_links = {}
    all_links.update(entity_links)
    all_links.update(relationship_links)

    return all_links


def run_validate_and_upload():
    try:
        # subprocess.run(
        #     ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "submit", "--output-csv", "county-results.csv"],
        #     stdout=subprocess.DEVNULL,    # ховаємо stdout
        #     stderr=subprocess.PIPE,       # ловимо stderr у буфер
        #     check=True,
        #     text=True,
        # )

        # if has_submit_errors():
        #     print("❌ Validate failed, please check submit_errors.csv for details", file=sys.stderr)
        #     return

        # seed_info = get_seed_info()
        seed_group_cid, data_cid, html_link = "bafkreiequh74xoafkabgtqnsyvynus5x6wmmh3k67v6cqrusplkhxuyvke", "bafkreibbxj5gbcqho3iranhsrwt7j2ajgwtjb7kz62mb55moju6i6tlad4", "http://dweb.link/ipfs/bafybeidvm5nfq3f6akwiljfe3m5a3bhhwotpqc5i46hih56fjldyqznp3u"

        all_links = collect_data_ipfs_links(data_cid)

        files_uploaded = len(all_links)

        print("✅ Upload done\n")
        print(f"{files_uploaded} files uploaded\n")

        print(f"County group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}\n")

        # Print all collected IPFS links (now deduplicated)
        print("=== IPFS Links ===")
        for link_name, link_url in all_links.items():
            print(f"{link_name}: {link_url}")

    except subprocess.CalledProcessError as e:
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_validate_and_upload()

✅ Upload done

86 files uploaded

County group CID: bafkreiequh74xoafkabgtqnsyvynus5x6wmmh3k67v6cqrusplkhxuyvke

HTML link: http://dweb.link/ipfs/bafybeidvm5nfq3f6akwiljfe3m5a3bhhwotpqc5i46hih56fjldyqznp3u

=== IPFS Links ===
property: https://ipfs.io/ipfs/bafkreihupabbeye3v2hpvxaplwwjdxwsavzolneab2igdw472maoczq6qa
address: https://ipfs.io/ipfs/bafkreihytmfhn26gyuyhdc5d3ovpbi3c2kygmhnzypmojoug4e7fxgjps4
lot: https://ipfs.io/ipfs/bafkreihgofffe3h6quj6e3l3yvyct5kdbzmybfx4wu2hfuykzhaipu3yvy
structure: https://ipfs.io/ipfs/bafkreidbfsf45uy2rmpmlbruoi4ybez2mjmrewpy4keasoj5gn2m7xjuqy
utility: https://ipfs.io/ipfs/bafkreibf7ollclzkkq7erazrhfbq7gvw35cju3yp2ihdzvyo5qgejoy5ji
company: https://ipfs.io/ipfs/bafkreihgjt72wqenjirz7ie35lvvpkll42zvg3sl2tqdybqleswca5pzki
person: https://ipfs.io/ipfs/bafkreihhfk76ycskrv5pqtwjo2lbhsh27kxmqagsk2dqxik6wgqgcaeicy
person_2: https://ipfs.io/ipfs/bafkreigmndfysu6jtp2e6w4bpr4fjowhoba76lof7irro73ovjd2e3kncu
person_3: https://ipfs.io/ipfs/bafkreif3mlbboolj3kuv5ld

In [None]:
# @title Step 7: Submit

! pip3 install python-dotenv -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv


def get_transaction_hash(path="transaction-status.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["transactionHash"]


def has_submit_errors(path="submit_errors.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def run_submit_to_contract():
    try:
        subprocess.run(
            [
                "npx", "-y", "@elephant-xyz/cli", "submit-to-contract", "county-results.csv",
                "--from-address", "0xefAd08946612A15d5De8D4Db7fc03556b6424075",
                "--api-key", "f7e18cf6-5d07-4e4a-ae23-f27b812614e6",
                "--domain", "oracles-69c46050.staircaseapi.com",
                "--oracle-key-id", "7ad26e0b-67c9-4c2f-95a2-2792c7db5ac7",
            ],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
            check=True,
            text=True,
        )
        if has_submit_errors():
            print("❌ Submit failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        transaction_hash = get_transaction_hash()
        transaction_link = f"https://polygonscan.com/tx/{transaction_hash}"

        print("✅ Submit done\n")
        print(f"Transaction link: {transaction_link}")

    except subprocess.CalledProcessError as e:
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_submit_to_contract()


✅ Submit done

Transaction link: https://polygonscan.com/tx/0x1a56c6a3f30c931e489d94902026e71d1981a438f91851c91061850982500d45


In [None]:
# @title Step 8: Download county-results.csv
import os; from google.colab import files; (files.download('county-results.csv'), print("✅ File was downloaded successfully"))[1] if os.path.exists('county-results.csv') else print("❌ File not found")
