# A Proof of Concept for IPFS & Filecoin Retrieval


## Introduction

WIP


## Imports


In [1]:
import subprocess
import requests
import random

## Existing Functions


In [2]:
def check_ipfs_installed() -> bool:
    try:
        ipfs_present = subprocess.check_output(["ipfs", "version"]).decode("utf-8")
        print(f"Local IPFS Node Detected! {ipfs_present}")

    except subprocess.CalledProcessError:
        return False  # Return False to indicate that IPFS is not installed

    return (
        True  # Return True to indicate that IPFS is installed and the daemon is running
    )


def check_ipfs_daemon_running() -> bool:
    try:
        subprocess.check_output(["ipfs", "swarm", "peers"]).decode(
            "utf-8"
        )  # Check if daemon is running by checking if we can connect to peers
    except subprocess.CalledProcessError:
        return False  # Return False to indicate that the daemon is not running

    return True  # Return True to indicate that the daemon is running


def get_data_from_local_ipfs_node(cid: str) -> bytes:
    """
    Fetches data from local IPFS node. If CID is not found on local node, it will attempt to fetch from IPFS network via local node.
    """
    try:  # Check if CID is on local IPFS node
        subprocess.check_output(["ipfs", "pin", "ls", cid])
        print(f"CID: {cid} found in local IPFS node")
        data = subprocess.check_output(["ipfs", "cat", cid])
        return data
    except subprocess.CalledProcessError:
        print(
            f"CID: {cid} not found in local IPFS node. Attempting to fetch from IPFS network via local node."
        )
        try:
            if (
                not check_ipfs_daemon_running()
            ):  # Check if IPFS daemon is running since we need it to fetch from IPFS network.
                print(
                    "IPFS daemon not running. Please start IPFS daemon by running 'ipfs daemon' or installing the IPFS desktop app and starting it."
                )
                return None
            data = subprocess.check_output(["ipfs", "cat", cid])
            print(f"CID: {cid} found on IPFS network via local IPFS node.")
            print("Pinning CID to local IPFS node")
            print(subprocess.check_output(["ipfs", "pin", "add", cid]))
        except subprocess.CalledProcessError:
            return None
    return data


def get_data_from_ipfs_network_using_gateway(cid: str) -> bytes:
    try:
        data = requests.get(f"https://ipfs.io/ipfs/{cid}").content
        print(f"CID: {cid} found on IPFS network via Gateway")
        return data
    except requests.exceptions.RequestException as e:
        return None

# -------------------------- NEW CODE STARTS HERE --------------------------

def get_data_from_filecoin(cid: str) -> bytes:
    # We use a network indexer (cid.contact) to find the storage providers that have the CID we are looking for in a storage deal
    api_endpoint = f"https://cid.contact/cid/{cid}" 
    response = requests.get(api_endpoint)

    if response.status_code == 200:
        data = response.json()
        storage_providers = data["MultihashResults"][0]["ProviderResults"]

        # Randomly shuffle the storage providers to select one at random
        random.shuffle(storage_providers)

        # Iterate over the shuffled list of storage providers
        for storage_provider in storage_providers:
            storage_provider_id = storage_provider["Provider"]["ID"]
            print(f"Trying Storage Provider ID: {storage_provider_id} found via cid.contact API")

            # Try to get the data from the storage provider
            provider_data = request_data_from_storage_provider(storage_provider_id, cid)

            # If we successfully got data, return it
            if provider_data:
                return provider_data
            else:
                print(f"Unable to get data from Storage Provider ID: {storage_provider_id}")

        # If we've gone through all storage providers and haven't found the data, return None
        print("Unable to find data with any storage provider")
        return None
    else:
        print(f"Error: Received status code {response.status_code} from CID contact API")
        return None


# Theoretical code to request data from a storage provider
def request_data_from_storage_provider(provider_id: str, cid: str) -> bytes:
    # Import necessary libraries
    import graphsync
    import libp2p

    # Assuming we have an established libp2p host
    host = libp2p.Host()

    # Graphsync instance
    gs = graphsync.Graphsync(host)

    # Create the selector for the data
    selector = {"cid": cid, "depth": 1}

    # Request the data from the provider
    responses = gs.request(provider_id, selector)

    # Wait for the data
    for response in responses:
        if response.status == graphsync.Status.SUCCESS:
            # This assumes that the data is small enough to fit in memory
            # and that the data is in the format you expect
            data = response.data
            return data

        else:
            print(f"Error: Received status code {response.status} from storage provider")
            return None


## Main Function


In [4]:
cid = (
    "bafybeigoe4ss23hrahns7sbqus6tas4ovvnhupmrnrym5zluu2ssg5yj5u"  # CID of of a Landsat 9 .tif file
)


def get_data(cid: str) -> bytes:
    if check_ipfs_installed():
        data = get_data_from_local_ipfs_node(cid)

    else:
        print("Local IPFS Node not detected. Please install IPFS and start the daemon.")
        print("Now Attempting to fetch CID from IPFS network via Gateway")
        data = get_data_from_ipfs_network_using_gateway(cid)

    if data:
        return data

    print("CID not found on IPFS network via Gateway.")
    print("Now Attempting to fetch CID from IPFS network via cid.contact API")
    data = get_data_from_filecoin(cid)
    if data is not None:
        return data

    print("Data was not found on IPFS or Filecoin via any method.")
    return None


data = get_data(cid)

Local IPFS Node Detected! ipfs version 0.18.1

CID: bafybeigoe4ss23hrahns7sbqus6tas4ovvnhupmrnrym5zluu2ssg5yj5u not found in local IPFS node. Attempting to fetch from IPFS network via local node.
IPFS daemon not running. Please start IPFS daemon by running 'ipfs daemon' or installing the IPFS desktop app and starting it.


Error: path 'bafybeigoe4ss23hrahns7sbqus6tas4ovvnhupmrnrym5zluu2ssg5yj5u' is not pinned
Error: this action must be run in online mode, try running 'ipfs daemon' first


CID not found on IPFS network via Gateway.
Now Attempting to fetch CID from IPFS network via cid.contact API
{'MultihashResults': [{'Multihash': 'EiDOJyUtbPEB2y/IMKS9MEuOrVp6PZFscM7ldKalI3cJ7Q==', 'ProviderResults': [{'ContextID': 'AXESIDcq3d4jIXyKPakgB9cF4U/X63GCyK7GH2InoVTVA8rx', 'Metadata': 'kBKjaFBpZWNlQ0lE2CpYKAABgeIDkiAgVqGOPf5CFwUwts6ep8cOwdFYqs9uVsAVgjXV5auHyytsVmVyaWZpZWREZWFs9W1GYXN0UmV0cmlldmFs9Q==', 'Provider': {'ID': '12D3KooWQY8k3XoH76BPPPXsrP5BWzTHpfC78u9aHS5FdTx2EXKZ', 'Addrs': ['/ip4/38.70.220.40/tcp/10201']}}]}]}
Trying Storage Provider ID: 12D3KooWQY8k3XoH76BPPPXsrP5BWzTHpfC78u9aHS5FdTx2EXKZ found via cid.contact API


ModuleNotFoundError: No module named 'graphsync'