In [13]:
import os
import requests
import tiktoken

def count_tokens(text):
    # Instantiate the tiktoken encoder for the model you are using, e.g., "gpt-3.5-turbo"
    encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
    tokens = encoder.encode(text)
    return len(tokens)

def fetch_repo_context(repo_url):
    # Extract owner and repository from the URL
    owner, repo = repo_url.split("/")[-2:]
    token = os.getenv("GITHUB_TOKEN")

    # Base URLs for GitHub API requests
    contents_api_url = f'https://api.github.com/repos/{owner}/{repo}/contents'
    languages_api_url = f'https://api.github.com/repos/{owner}/{repo}/languages'

    # Headers for the API requests
    headers = {
        'Accept': 'application/vnd.github.v3+json',
        'Authorization': f'token {token}'
    }

    # Initialize context list
    context = []
    total_tokens = 0

    def traverse_dir(api_url, prefix=""):
        response = requests.get(api_url, headers=headers)
        response.raise_for_status()

        structure = []
        important_files = [
            "requirements.txt", "Dockerfile", ".gitignore", "package.json", "Gemfile", "README.md",
            ".env.example", "Pipfile", "setup.py", "Pipfile.lock", "pyproject.toml",
            "CMakeLists.txt", "Makefile", "go.mod", "go.sum", "pom.xml", "build.gradle",
            "Cargo.toml", "Cargo.lock", "composer.json", "phpunit.xml", "mix.exs",
            "pubspec.yaml", "stack.yaml", "DESCRIPTION", "NAMESPACE", "Rakefile"
        ]

        for item in response.json():
            if item['type'] == 'dir':
                structure.append(f"{prefix}{item['name']}/")
                structure.extend(traverse_dir(item['url'], prefix=prefix + "    "))
            else:
                structure.append(f"{prefix}{item['name']}")

            # Fetch contents of specific files
            if item['name'] in important_files:
                file_content = requests.get(item['download_url']).text
                content_text = f"<<SECTION: Content of {item['name']} >>\n{file_content}" + f"\n<<END_SECTION: Content of {item['name']} >>"
                context.append(content_text)

                # Count tokens in the content
                file_tokens = count_tokens(content_text)
                nonlocal total_tokens
                total_tokens += file_tokens

        return structure

    # Build the repository structure by traversing the root directory
    repo_structure = traverse_dir(contents_api_url)

    # Add repository structure to context
    repo_structure_text = "<<SECTION: Repository Structure >>\n" + "\n".join(repo_structure) + "\n<<END_SECTION: Repository Structure >>"
    context.insert(0, repo_structure_text)

    # Count tokens in the repo structure
    repo_structure_tokens = count_tokens(repo_structure_text)
    total_tokens += repo_structure_tokens

    # Make request to fetch repository languages
    languages_response = requests.get(languages_api_url, headers=headers)
    languages_response.raise_for_status()

    # Add repository languages to context
    languages_data = languages_response.json()
    languages_context = "<<SECTION: Repository Languages >>\n" + "\n".join([f"{lang}: {count} lines" for lang, count in languages_data.items()]) + "\n<<END_SECTION: Repository Languages >>"
    context.append(languages_context)

    # Count tokens in the languages context
    languages_tokens = count_tokens(languages_context)
    total_tokens += languages_tokens

    # Debug print for total tokens (optional)
    print(f"Total tokens: {total_tokens}")

    # Return the combined context
    return "\n\n".join(context)

# Example usage (ensure GITHUB_TOKEN environment variable is set):
print(fetch_repo_context('https://github.com/nkkko/pulent'))

Total tokens: 6005
<<SECTION: Repository Structure >>
.editorconfig
.eslintignore
.eslintrc.cjs
.example.env
.github/
    config/
        exclude.txt
    dependabot.yml
    pull_request_template.md
.gitignore
.npmrc
.prettierignore
.prettierrc.js
.vscode/
    extensions.json
    launch.json
    post.code-snippets
    settings.json
LICENSE
README.md
astro.config.ts
convert.ipynb
netlify.toml
package-lock.json
package.json
postcss.config.js
public/
    192x192.png
    512x512.png
    apple-touch-icon.png
    favicon.ico
    icon.svg
    logo_pulent_bw_1200.png
    logo_pulent_bw_150.png
    manifest.webmanifest
    robots.txt
    social-card.png
src/
    assets/
        about-astro.png
        roboto-mono-700.ttf
        roboto-mono-regular.ttf
    components/
        BaseHead.astro
        FormattedDate.astro
        Paginator.astro
        Search.astro
        SkipLink.astro
        SocialList.astro
        ThemeProvider.astro
        ThemeToggle.astro
        blog/
            Hero.as