In [1]:
import frontmatter

sample = """---
title: Blockchain Test
tags: [web3, smart-contract]
---
This is the content
"""

post = frontmatter.loads(sample)
print(post.metadata)
print(post.content)


{'title': 'Blockchain Test', 'tags': ['web3', 'smart-contract']}
This is the content


In [4]:
import io
import zipfile
import requests
import frontmatter

In [5]:
url = 'https://codeload.github.com/Bloceducare/Web3bridge-Web3-Cohort-XIII/zip/refs/heads/main'
resp = requests.get(url)

In [6]:
def read_repo_data(repo_owner, repo_name):
    """
    Download and parse all markdown files from a GitHub repository.
    
    Args:
        repo_owner: GitHub username or organization
        repo_name: Repository name
    
    Returns:
        List of dictionaries containing file content and metadata
    """
    prefix = 'https://codeload.github.com' 
    url = f'{prefix}/{repo_owner}/{repo_name}/zip/refs/heads/main'
    resp = requests.get(url)
    
    if resp.status_code != 200:
        raise Exception(f"Failed to download repository: {resp.status_code}")

    repository_data = []
    zf = zipfile.ZipFile(io.BytesIO(resp.content))
    
    for file_info in zf.infolist():
        filename = file_info.filename
        filename_lower = filename.lower()

        if not (filename_lower.endswith('.md') 
            or filename_lower.endswith('.mdx')):
            continue
    
        try:
            with zf.open(file_info) as f_in:
                content = f_in.read().decode('utf-8', errors='ignore')
                post = frontmatter.loads(content)
                data = post.to_dict()
                data['filename'] = filename
                repository_data.append(data)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue
    
    zf.close()
    return repository_data

In [7]:
web3_docs = read_repo_data('Bloceducare', 'Web3bridge-Web3-Cohort-XIII')

print(f"Documents found: {len(web3_docs)}")

Documents found: 254


In [11]:
print(web3_docs[2])

{'content': '# Sample Hardhat Project\n\nThis project demonstrates a basic Hardhat use case. It comes with a sample contract, a test for that contract, and a Hardhat Ignition module that deploys that contract.\n\nTry running some of the following tasks:\n\n```shell\nnpx hardhat help\nnpx hardhat test\nREPORT_GAS=true npx hardhat test\nnpx hardhat node\nnpx hardhat ignition deploy ./ignition/modules/Lock.js\n```', 'filename': 'Web3bridge-Web3-Cohort-XIII-main/online-program/submissions/Allan-robinson-sepolia-storage/README.md'}
