In [1]:
import requests
import pandas as pd
import re

# Initialize an empty DataFrame
df = pd.DataFrame(columns=['layer', 'name', 'chainId', 'explorerUrl', 'category', 'slug'])

# GitHub API URL for the specified repository and directory
base_url = "https://api.github.com/repos/l2beat/l2beat/contents/packages/config/src"

# Folders to navigate
folders = ["chains", "layer2s", "layer3s"]

# Regular expression patterns for parsing TypeScript files
patterns = {
    'name': r"name: '([^']+)'",
    'chainId': r"chainId: (\d+)",
    'explorerUrl': r"explorerUrl: '([^']+)'",
    'category': r"category: '([^']+)'",
    'slug': r"slug: '([^']+)'",
}

# Function to extract data using regular expressions
def extract_data(text, pattern):
    match = re.search(pattern, text)
    return match.group(1) if match else None

# Function to safely get file content, returns None if URL is invalid
def safe_get_content(url):
    if url:
        try:
            return requests.get(url).text
        except requests.exceptions.MissingSchema:
            print(f"Invalid URL: {url}")
    return None

# Navigate through the folders
for folder in folders:
    # Request the content of the folder
    response = requests.get(f"{base_url}/{folder}").json()
    
    # Initialize a list to collect data dictionaries before appending to DataFrame
    data_list = []
    
    # Iterate through each file in the folder
    for file in response:
        if file['name'] in ['index.ts', 'index.test.ts'] or 'download_url' not in file or file['download_url'] is None:
            continue  # Skip these files
        
        # Get the content of the file safely
        file_content = safe_get_content(file['download_url'])
        if file_content is None:  # Skip if content couldn't be retrieved
            continue
        
        # Prepare data with extracted values or defaults where necessary
        data = {
            'layer': folder[:-1],  # Dynamically set the layer based on folder name
            'name': extract_data(file_content, patterns['name']),
            'chainId': extract_data(file_content, patterns['chainId']),
            'explorerUrl': extract_data(file_content, patterns['explorerUrl']),
            'category': extract_data(file_content, patterns['category']) if folder in ['layer2s', 'layer3s'] else None,
            'slug': extract_data(file_content, patterns['slug']) or file['name'].replace('.ts', ''),  # Filename as fallback slug
        }
        
        # Add the data dictionary to our list
        data_list.append(data)
    
    # Convert the list of dictionaries to a DataFrame and concatenate with the main DataFrame
    df = pd.concat([df, pd.DataFrame(data_list)], ignore_index=True)

# Display the DataFrame
print(df)


      layer                   name chainId            explorerUrl  \
0     chain              avalanche   43114   https://snowtrace.io   
1     chain                    bsc      56    https://bscscan.com   
2     chain                   celo   42220    https://celoscan.io   
3     chain               ethereum       1   https://etherscan.io   
4     chain                 gnosis     100  https://gnosisscan.io   
..      ...                    ...     ...                    ...   
109  layer3             RARI Chain    None                   None   
110  layer3                  Stack    None                   None   
111  layer3  Syndicate Frame Chain    None                   None   
112  layer3                    Xai    None                   None   
113  layer3            zkLink Nova    None                   None   

                   category            slug  
0                      None       avalanche  
1                      None             bsc  
2                      None      