# Generate Data

In [3]:
import requests
import os
import re
import pandas as pd
from urllib.parse import urlparse, quote
from urllib.parse import urlparse

def parse_markdown_to_csv(md_content, csv_file_path):
    heading_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
    headings_contents = []
    current_heading = None
    current_content = []
    
    for line in md_content.split('\n'):
        match = heading_pattern.match(line)
        if match:
            if current_heading is not None:
                headings_contents.append([current_heading, ' '.join(current_content).strip()])
            current_heading = match.group(2).strip()
            current_content = []
        else:
            if line.strip():
                current_content.append(line.strip())
    
    if current_heading is not None:
        headings_contents.append([current_heading, ' '.join(current_content).strip()])
    
    df = pd.DataFrame(headings_contents, columns=['Title', 'Content'])
    df.to_csv(csv_file_path, index=False, encoding='utf-8')

def fetch_and_convert_readme_to_csv(repo_urls, output_dir):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # GitHub API endpoint for fetching the contents of the README file
    for url in repo_urls:
        parsed_url = urlparse(url)
        parts = parsed_url.path.strip('/').split('/')
        repo_user, repo_name = parts[0], parts[1]
        api_url = f"https://api.github.com/repos/{repo_user}/{repo_name}/readme"
        
        # Set up appropriate headers for GitHub API including the token for authorization
        headers = {
            'Accept': 'application/vnd.github.v3.raw',
            'Authorization': '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'  # Replace 'YOUR_GITHUB_TOKEN' with your actual GitHub token
        }
        
        response = requests.get(api_url, headers=headers)
        if response.status_code == 200:
            readme_content = response.text
            csv_file_path = os.path.join(output_dir, f"{repo_name}.csv")
            parse_markdown_to_csv(readme_content, csv_file_path)
            print(f"Processed {repo_name}.csv")
        else:
            print(f"Failed to fetch README for {repo_name}: {response.status_code}")

# Example usage:
repo_urls = [
    'https://github.com/context-labs/autodoc'
]

fetch_and_convert_readme_to_csv(repo_urls, 'output_csv_files')


Processed autodoc.csv


In [1]:
import requests
import os
import pandas as pd
import base64
from urllib.parse import urlparse

def fetch_and_concatenate_source_code(repo_urls, output_dir, token):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    headers = {
        'Authorization': f'token {token}',
        'Accept': 'application/vnd.github.v3.raw'  # Requests raw content directly
    }

    for url in repo_urls:
        parsed_url = urlparse(url)
        parts = parsed_url.path.strip('/').split('/')
        repo_user, repo_name = parts[0], parts[1]

        # Fetch the default branch
        repo_info_url = f'https://api.github.com/repos/{repo_user}/{repo_name}'
        repo_info_response = requests.get(repo_info_url, headers=headers)
        if repo_info_response.status_code == 200:
            default_branch = repo_info_response.json()['default_branch']
        else:
            print(f'Failed to fetch repo info for {repo_name}: {repo_info_response.status_code}')
            continue

        api_url = f'https://api.github.com/repos/{repo_user}/{repo_name}/git/trees/{default_branch}?recursive=true'
        response = requests.get(api_url, headers={'Authorization': f'token {token}', 'Accept': 'application/vnd.github.v3+json'})
        
        if response.status_code == 200:
            data = response.json()
            all_files_content = []

            for file in data['tree']:
                if file['type'] == 'blob' and file['path'].endswith(('.py', '.c', '.cpp', '.java', '.js', '.ts', '.go')):
                    file_url = f"https://api.github.com/repos/{repo_user}/{repo_name}/contents/{file['path']}?ref={default_branch}"
                    file_response = requests.get(file_url, headers=headers)
                    if file_response.status_code == 200:
                        file_content = file_response.text
                        all_files_content.append(file_content)

            concatenated_content = "\n".join(all_files_content)
            df = pd.DataFrame([concatenated_content], columns=['SourceCode'])
            df.to_csv(os.path.join(output_dir, f'{repo_name}_context.csv'), index=False)
            print(f'Saved {repo_name}_context.csv')
        else:
            print(f'Failed to fetch repository data for {repo_name}: {response.status_code}')

# Example usage:
repo_urls = [
    "https://github.com/context-labs/autodoc"
]
output_directory = 'output_csv_files'
github_token = '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'  # Replace with your GitHub access token

fetch_and_concatenate_source_code(repo_urls, output_directory, github_token)


Saved autodoc_context.csv


In [14]:
import os
import subprocess
import csv
from pathlib import Path
import shutil
import requests
import os
import re
import pandas as pd
from urllib.parse import urlparse, quote
from urllib.parse import urlparse


# Clone repository to a local path
def git_clone(repo_url, clone_path):
    if os.path.exists(clone_path):
        subprocess.run(['rm', '-rf', clone_path], check=True)
    subprocess.run(['git', 'clone', repo_url, clone_path], check=True)

# Parse the README.md content into a CSV
def parse_markdown_to_csv(md_file_path, csv_file_path):
    with open(md_file_path, 'r', encoding='utf-8') as file:
        md_content = file.read()

    heading_pattern = re.compile(r'^(#+)\s*(.*)', re.MULTILINE)
    headings_contents = []
    current_heading = None
    current_content = []

    for line in md_content.split('\n'):
        match = heading_pattern.match(line)
        if match:
            if current_heading is not None:
                headings_contents.append([current_heading, ' '.join(current_content).strip()])
            current_heading = match.group(2).strip()
            current_content = []
        else:
            if line.strip():
                current_content.append(line.strip())

    if current_heading is not None:
        headings_contents.append([current_heading, ' '.join(current_content).strip()])

    df = pd.DataFrame(headings_contents, columns=['Title', 'Content'])
    df.to_csv(csv_file_path, index=False, encoding='utf-8')

# Process a list of GitHub repository URLs
def process_repos(repo_urls, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for url in repo_urls:
        parsed_url = urlparse(url)
        parts = parsed_url.path.strip('/').split('/')
        repo_user, repo_name = parts[0], parts[1]
        clone_path = f"/tmp/{repo_name}"  # Temporary path for cloning
        git_clone(url, clone_path)

        readme_path = os.path.join(clone_path, 'README.md')
        csv_file_path = os.path.join(output_dir, f"{repo_name}.csv")
        if os.path.exists(readme_path):
            parse_markdown_to_csv(readme_path, csv_file_path)
            print(f"Processed {repo_name}.csv")
        else:
            print(f"README.md not found for {repo_name}")

        # Remove the repository directory to clean up
        subprocess.run(['rm', '-rf', clone_path], check=True)


In [None]:
# Replace this list with your own list of 300 URLs
#repo_urls = ['https://github.com/public-apis/public-apis', 'https://github.com/donnemartin/system-design-primer', 'https://github.com/vinta/awesome-python', 'https://github.com/TheAlgorithms/Python', 'https://github.com/jackfrued/Python-100-Days', 'https://github.com/AUTOMATIC1111/stable-diffusion-webui', 'https://github.com/ytdl-org/youtube-dl', 'https://github.com/huggingface/transformers', 'https://github.com/521xueweihan/HelloGitHub', 'https://github.com/langchain-ai/langchain', 'https://github.com/nvbn/thefuck', 'https://github.com/pytorch/pytorch', 'https://github.com/django/django', 'https://github.com/tensorflow/models', 'https://github.com/yt-dlp/yt-dlp', 'https://github.com/tiangolo/fastapi', 'https://github.com/home-assistant/core', 'https://github.com/pallets/flask', 'https://github.com/fighting41love/funNLP', 'https://github.com/bregman-arie/devops-exercises', 'https://github.com/josephmisiti/awesome-machine-learning', 'https://github.com/ansible/ansible', 'https://github.com/keras-team/keras', 'https://github.com/openai/whisper', 'https://github.com/python/cpython', 'https://github.com/3b1b/manim', 'https://github.com/scikit-learn/scikit-learn', 'https://github.com/xtekky/gpt4free', 'https://github.com/binary-husky/gpt_academic', 'https://github.com/d2l-ai/d2l-zh', 'https://github.com/swisskyrepo/PayloadsAllTheThings', 'https://github.com/meta-llama/llama', 'https://github.com/localstack/localstack', 'https://github.com/zylon-ai/private-gpt', 'https://github.com/ageitgey/face_recognition', 'https://github.com/sherlock-project/sherlock', 'https://github.com/psf/requests', 'https://github.com/scrapy/scrapy', 'https://github.com/CorentinJ/Real-Time-Voice-Cloning', 'https://github.com/gpt-engineer-org/gpt-engineer', 'https://github.com/abi/screenshot-to-code', 'https://github.com/deepfakes/faceswap', 'https://github.com/soimort/you-get', 'https://github.com/OpenInterpreter/open-interpreter', 'https://github.com/xai-org/grok-1', 'https://github.com/commaai/openpilot', 'https://github.com/Textualize/rich', 'https://github.com/ultralytics/yolov5', 'https://github.com/minimaxir/big-list-of-naughty-strings', 'https://github.com/iperov/DeepFaceLab', 'https://github.com/charlax/professional-programming', 'https://github.com/Z4nzu/hackingtool', 'https://github.com/pandas-dev/pandas', 'https://github.com/isocpp/CppCoreGuidelines', 'https://github.com/geekan/MetaGPT', 'https://github.com/faif/python-patterns', 'https://github.com/THUDM/ChatGLM-6B', 'https://github.com/PaddlePaddle/PaddleOCR', 'https://github.com/apachecn/ailearning', 'https://github.com/hpcaitech/ColossalAI', 'https://github.com/chubin/cheat.sh', 'https://github.com/psf/black', 'https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap', 'https://github.com/google-research/bert', 'https://github.com/getsentry/sentry', 'https://github.com/oobabooga/text-generation-webui', 'https://github.com/LAION-AI/Open-Assistant', 'https://github.com/Stability-AI/stablediffusion', 'https://github.com/0voice/interview_internal_reference', 'https://github.com/gto76/python-cheatsheet', 'https://github.com/lllyasviel/Fooocus', 'https://github.com/XingangPan/DragGAN', 'https://github.com/satwikkansal/wtfpython', 'https://github.com/mingrammer/diagrams', 'https://github.com/odoo/odoo', 'https://github.com/TencentARC/GFPGAN', 'https://github.com/apache/airflow', 'https://github.com/chenfei-wu/TaskMatrix', 'https://github.com/mitmproxy/mitmproxy', 'https://github.com/lm-sys/FastChat', 'https://github.com/comfyanonymous/ComfyUI', 'https://github.com/babysor/MockingBird', 'https://github.com/openai/gym', 'https://github.com/testerSunshine/12306', 'https://github.com/shadowsocks/shadowsocks', 'https://github.com/microsoft/DeepSpeed', 'https://github.com/XX-net/XX-Net', 'https://github.com/fxsjy/jieba', 'https://github.com/hankcs/HanLP', 'https://github.com/Asabeneh/30-Days-Of-Python', 'https://github.com/karpathy/nanoGPT', 'https://github.com/httpie/cli', 'https://github.com/streamlit/streamlit', 'https://github.com/ccxt/ccxt', 'https://github.com/run-llama/llama_index', 'https://github.com/ray-project/ray', 'https://github.com/certbot/certbot', 'https://github.com/sqlmapproject/sqlmap', 'https://github.com/geekcomputers/Python', 'https://github.com/huggingface/pytorch-image-models', 'https://github.com/coqui-ai/TTS', 'https://github.com/python-poetry/poetry', 'https://github.com/0xAX/linux-insides', 'https://github.com/facebookresearch/fairseq', 'https://github.com/gradio-app/gradio', 'https://github.com/yunjey/pytorch-tutorial', 'https://github.com/tatsu-lab/stanford_alpaca', 'https://github.com/explosion/spaCy', 'https://github.com/donnemartin/interactive-coding-challenges', 'https://github.com/facebookresearch/detectron2', 'https://github.com/Pythagora-io/gpt-pilot', 'https://github.com/google/jax', 'https://github.com/lllyasviel/ControlNet', 'https://github.com/acheong08/ChatGPT', 'https://github.com/open-mmlab/mmdetection', 'https://github.com/chatchat-space/Langchain-Chatchat', 'https://github.com/encode/django-rest-framework', 'https://github.com/tqdm/tqdm', 'https://github.com/Lightning-AI/pytorch-lightning', 'https://github.com/LC044/WeChatMsg', 'https://github.com/OWASP/CheatSheetSeries', 'https://github.com/donnemartin/data-science-ipython-notebooks', 'https://github.com/numpy/numpy', 'https://github.com/google/python-fire', 'https://github.com/xinntao/Real-ESRGAN', 'https://github.com/OpenBB-finance/OpenBBTerminal', 'https://github.com/facebookresearch/Detectron', 'https://github.com/freqtrade/freqtrade', 'https://github.com/StevenBlack/hosts', 'https://github.com/ycm-core/YouCompleteMe', 'https://github.com/spipm/Depix', 'https://github.com/zhayujie/chatgpt-on-wechat', 'https://github.com/littlecodersh/ItChat', 'https://github.com/nicolargo/glances', 'https://github.com/s0md3v/roop', 'https://github.com/getredash/redash', 'https://github.com/deezer/spleeter', 'https://github.com/Vision-CAIR/MiniGPT-4', 'https://github.com/python-telegram-bot/python-telegram-bot', 'https://github.com/pypa/pipenv', 'https://github.com/myshell-ai/OpenVoice', 'https://github.com/OpenDevin/OpenDevin', 'https://github.com/microsoft/cascadia-code', 'https://github.com/matterport/Mask_RCNN', 'https://github.com/tinygrad/tinygrad', 'https://github.com/svc-develop-team/so-vits-svc', 'https://github.com/RVC-Boss/GPT-SoVITS', 'https://github.com/jumpserver/jumpserver', 'https://github.com/locustio/locust', 'https://github.com/chubin/wttr.in', 'https://github.com/Textualize/textual', 'https://github.com/celery/celery', 'https://github.com/keon/algorithms', 'https://github.com/vnpy/vnpy', 'https://github.com/iperov/DeepFaceLive', 'https://github.com/ultralytics/ultralytics', 'https://github.com/eriklindernoren/ML-From-Scratch', 'https://github.com/microsoft/JARVIS', 'https://github.com/huggingface/diffusers', 'https://github.com/wangzheng0822/algo', 'https://github.com/mouredev/Hello-Python', 'https://github.com/Stability-AI/generative-models', 'https://github.com/sebastianruder/NLP-progress', 'https://github.com/JaidedAI/EasyOCR', 'https://github.com/kovidgoyal/kitty', 'https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix', 'https://github.com/HumanSignal/labelImg', 'https://github.com/d2l-ai/d2l-en', 'https://github.com/AtsushiSakai/PythonRobotics', 'https://github.com/pytorch/examples', 'https://github.com/cookiecutter/cookiecutter', 'https://github.com/tornadoweb/tornado', 'https://github.com/hiyouga/LLaMA-Factory', 'https://github.com/mindsdb/mindsdb', 'https://github.com/deepinsight/insightface', 'https://github.com/openai/gpt-2', 'https://github.com/luong-komorebi/Awesome-Linux-Software', 'https://github.com/WZMIAOMIAO/deep-learning-for-image-processing', 'https://github.com/drduh/macOS-Security-and-Privacy-Guide', 'https://github.com/openai/chatgpt-retrieval-plugin', 'https://github.com/plotly/dash', 'https://github.com/chriskiehl/Gooey', 'https://github.com/jhao104/proxy_pool', 'https://github.com/pyg-team/pytorch_geometric', 'https://github.com/saleor/saleor', 'https://github.com/zulip/zulip', 'https://github.com/jina-ai/jina', 'https://github.com/openai/openai-python', 'https://github.com/KurtBestor/Hitomi-Downloader', 'https://github.com/521xueweihan/GitHub520', 'https://github.com/ArchiveBox/ArchiveBox', 'https://github.com/facebookresearch/audiocraft', 'https://github.com/meta-llama/llama3', 'https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI', 'https://github.com/matplotlib/matplotlib', 'https://github.com/yoheinakajima/babyagi', 'https://github.com/Vonng/ddia', 'https://github.com/PromtEngineer/localGPT', 'https://github.com/vllm-project/vllm', 'https://github.com/ManimCommunity/manim', 'https://github.com/ungoogled-software/ungoogled-chromium', 'https://github.com/karpathy/minGPT', 'https://github.com/magenta/magenta', 'https://github.com/bokeh/bokeh', 'https://github.com/pydantic/pydantic', 'https://github.com/huggingface/datasets', 'https://github.com/microsoft/unilm', 'https://github.com/kholia/OSX-KVM', 'https://github.com/kovidgoyal/calibre', 'https://github.com/mkdocs/mkdocs', 'https://github.com/magic-wormhole/magic-wormhole', 'https://github.com/Delgan/loguru', 'https://github.com/lucidrains/vit-pytorch', 'https://github.com/nginx-proxy/nginx-proxy', 'https://github.com/recommenders-team/recommenders', 'https://github.com/RasaHQ/rasa', 'https://github.com/facebook/prophet', 'https://github.com/sanic-org/sanic', 'https://github.com/kaixindelele/ChatPaper', 'https://github.com/Jack-Cherish/python-spider', 'https://github.com/jantic/DeOldify', 'https://github.com/python/mypy', 'https://github.com/ymcui/Chinese-LLaMA-Alpaca', 'https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']
repo_urls = ['https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']
output_directory = 'output_csv_files'
process_repos(repo_urls, output_directory)

In [1]:
import os
import subprocess
import csv
from pathlib import Path
import shutil

In [2]:
# Function to clone a GitHub repository and collect all source code into a single string
def collect_source_code(repo_url):
    # Extract the repo name from the URL
    repo_name = repo_url.rstrip('/').split('/')[-1]
    subprocess.run(['git', 'clone', repo_url], check=True)
    
    # Collect all source code files into a single string
    source_code = []
    for root, dirs, files in os.walk(repo_name):
        for file in files:
            # Filter for source code files only (adjust filters as needed)
            if file.endswith(('.py', '.js', '.java', '.cpp', '.c', '.h', '.html', '.css', '.ts', '.go', '.rb', '.php')):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', errors='ignore') as f:
                    source_code.append(f.read())
                    
    # Join all source code files as one big string
    concatenated_code = "\n".join(source_code)
    
    # Delete the repo after extraction
    shutil.rmtree(repo_name)
    
    return repo_name, concatenated_code


In [5]:
# Replace this list with your own list of 300 URLs
#github_urls = ['https://github.com/public-apis/public-apis', 'https://github.com/donnemartin/system-design-primer', 'https://github.com/vinta/awesome-python', 'https://github.com/TheAlgorithms/Python', 'https://github.com/jackfrued/Python-100-Days', 'https://github.com/AUTOMATIC1111/stable-diffusion-webui', 'https://github.com/ytdl-org/youtube-dl', 'https://github.com/huggingface/transformers', 'https://github.com/521xueweihan/HelloGitHub', 'https://github.com/langchain-ai/langchain', 'https://github.com/nvbn/thefuck', 'https://github.com/pytorch/pytorch', 'https://github.com/django/django', 'https://github.com/tensorflow/models', 'https://github.com/yt-dlp/yt-dlp', 'https://github.com/tiangolo/fastapi', 'https://github.com/home-assistant/core', 'https://github.com/pallets/flask', 'https://github.com/fighting41love/funNLP', 'https://github.com/bregman-arie/devops-exercises', 'https://github.com/josephmisiti/awesome-machine-learning', 'https://github.com/ansible/ansible', 'https://github.com/keras-team/keras', 'https://github.com/openai/whisper', 'https://github.com/python/cpython', 'https://github.com/3b1b/manim', 'https://github.com/scikit-learn/scikit-learn', 'https://github.com/xtekky/gpt4free', 'https://github.com/binary-husky/gpt_academic', 'https://github.com/d2l-ai/d2l-zh', 'https://github.com/swisskyrepo/PayloadsAllTheThings', 'https://github.com/meta-llama/llama', 'https://github.com/localstack/localstack', 'https://github.com/zylon-ai/private-gpt', 'https://github.com/ageitgey/face_recognition', 'https://github.com/sherlock-project/sherlock', 'https://github.com/psf/requests', 'https://github.com/scrapy/scrapy', 'https://github.com/CorentinJ/Real-Time-Voice-Cloning', 'https://github.com/gpt-engineer-org/gpt-engineer', 'https://github.com/abi/screenshot-to-code', 'https://github.com/deepfakes/faceswap', 'https://github.com/soimort/you-get', 'https://github.com/OpenInterpreter/open-interpreter', 'https://github.com/xai-org/grok-1', 'https://github.com/commaai/openpilot', 'https://github.com/Textualize/rich', 'https://github.com/ultralytics/yolov5', 'https://github.com/minimaxir/big-list-of-naughty-strings', 'https://github.com/iperov/DeepFaceLab', 'https://github.com/charlax/professional-programming', 'https://github.com/Z4nzu/hackingtool', 'https://github.com/pandas-dev/pandas', 'https://github.com/isocpp/CppCoreGuidelines', 'https://github.com/geekan/MetaGPT', 'https://github.com/faif/python-patterns', 'https://github.com/THUDM/ChatGLM-6B', 'https://github.com/PaddlePaddle/PaddleOCR', 'https://github.com/apachecn/ailearning', 'https://github.com/hpcaitech/ColossalAI', 'https://github.com/chubin/cheat.sh', 'https://github.com/psf/black', 'https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap', 'https://github.com/google-research/bert', 'https://github.com/getsentry/sentry', 'https://github.com/oobabooga/text-generation-webui', 'https://github.com/LAION-AI/Open-Assistant', 'https://github.com/Stability-AI/stablediffusion', 'https://github.com/0voice/interview_internal_reference', 'https://github.com/gto76/python-cheatsheet', 'https://github.com/lllyasviel/Fooocus', 'https://github.com/XingangPan/DragGAN', 'https://github.com/satwikkansal/wtfpython', 'https://github.com/mingrammer/diagrams', 'https://github.com/odoo/odoo', 'https://github.com/TencentARC/GFPGAN', 'https://github.com/apache/airflow', 'https://github.com/chenfei-wu/TaskMatrix', 'https://github.com/mitmproxy/mitmproxy', 'https://github.com/lm-sys/FastChat', 'https://github.com/comfyanonymous/ComfyUI', 'https://github.com/babysor/MockingBird', 'https://github.com/openai/gym', 'https://github.com/testerSunshine/12306', 'https://github.com/shadowsocks/shadowsocks', 'https://github.com/microsoft/DeepSpeed', 'https://github.com/XX-net/XX-Net', 'https://github.com/fxsjy/jieba', 'https://github.com/hankcs/HanLP', 'https://github.com/Asabeneh/30-Days-Of-Python', 'https://github.com/karpathy/nanoGPT', 'https://github.com/httpie/cli', 'https://github.com/streamlit/streamlit', 'https://github.com/ccxt/ccxt', 'https://github.com/run-llama/llama_index', 'https://github.com/ray-project/ray', 'https://github.com/certbot/certbot', 'https://github.com/sqlmapproject/sqlmap', 'https://github.com/geekcomputers/Python', 'https://github.com/huggingface/pytorch-image-models', 'https://github.com/coqui-ai/TTS', 'https://github.com/python-poetry/poetry', 'https://github.com/0xAX/linux-insides', 'https://github.com/facebookresearch/fairseq', 'https://github.com/gradio-app/gradio', 'https://github.com/yunjey/pytorch-tutorial', 'https://github.com/tatsu-lab/stanford_alpaca', 'https://github.com/explosion/spaCy', 'https://github.com/donnemartin/interactive-coding-challenges', 'https://github.com/facebookresearch/detectron2', 'https://github.com/Pythagora-io/gpt-pilot', 'https://github.com/google/jax', 'https://github.com/lllyasviel/ControlNet', 'https://github.com/acheong08/ChatGPT', 'https://github.com/open-mmlab/mmdetection', 'https://github.com/chatchat-space/Langchain-Chatchat', 'https://github.com/encode/django-rest-framework', 'https://github.com/tqdm/tqdm', 'https://github.com/Lightning-AI/pytorch-lightning', 'https://github.com/LC044/WeChatMsg', 'https://github.com/OWASP/CheatSheetSeries', 'https://github.com/donnemartin/data-science-ipython-notebooks', 'https://github.com/numpy/numpy', 'https://github.com/google/python-fire', 'https://github.com/xinntao/Real-ESRGAN', 'https://github.com/OpenBB-finance/OpenBBTerminal', 'https://github.com/facebookresearch/Detectron', 'https://github.com/freqtrade/freqtrade', 'https://github.com/StevenBlack/hosts', 'https://github.com/ycm-core/YouCompleteMe', 'https://github.com/spipm/Depix', 'https://github.com/zhayujie/chatgpt-on-wechat', 'https://github.com/littlecodersh/ItChat', 'https://github.com/nicolargo/glances', 'https://github.com/s0md3v/roop', 'https://github.com/getredash/redash', 'https://github.com/deezer/spleeter', 'https://github.com/Vision-CAIR/MiniGPT-4', 'https://github.com/python-telegram-bot/python-telegram-bot', 'https://github.com/pypa/pipenv', 'https://github.com/myshell-ai/OpenVoice', 'https://github.com/OpenDevin/OpenDevin', 'https://github.com/microsoft/cascadia-code', 'https://github.com/matterport/Mask_RCNN', 'https://github.com/tinygrad/tinygrad', 'https://github.com/svc-develop-team/so-vits-svc', 'https://github.com/RVC-Boss/GPT-SoVITS', 'https://github.com/jumpserver/jumpserver', 'https://github.com/locustio/locust', 'https://github.com/chubin/wttr.in', 'https://github.com/Textualize/textual', 'https://github.com/celery/celery', 'https://github.com/keon/algorithms', 'https://github.com/vnpy/vnpy', 'https://github.com/iperov/DeepFaceLive', 'https://github.com/ultralytics/ultralytics', 'https://github.com/eriklindernoren/ML-From-Scratch', 'https://github.com/microsoft/JARVIS', 'https://github.com/huggingface/diffusers', 'https://github.com/wangzheng0822/algo', 'https://github.com/mouredev/Hello-Python', 'https://github.com/Stability-AI/generative-models', 'https://github.com/sebastianruder/NLP-progress', 'https://github.com/JaidedAI/EasyOCR', 'https://github.com/kovidgoyal/kitty', 'https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix', 'https://github.com/HumanSignal/labelImg', 'https://github.com/d2l-ai/d2l-en', 'https://github.com/AtsushiSakai/PythonRobotics', 'https://github.com/pytorch/examples', 'https://github.com/cookiecutter/cookiecutter', 'https://github.com/tornadoweb/tornado', 'https://github.com/hiyouga/LLaMA-Factory', 'https://github.com/mindsdb/mindsdb', 'https://github.com/deepinsight/insightface', 'https://github.com/openai/gpt-2', 'https://github.com/luong-komorebi/Awesome-Linux-Software', 'https://github.com/WZMIAOMIAO/deep-learning-for-image-processing', 'https://github.com/drduh/macOS-Security-and-Privacy-Guide', 'https://github.com/openai/chatgpt-retrieval-plugin', 'https://github.com/plotly/dash', 'https://github.com/chriskiehl/Gooey', 'https://github.com/jhao104/proxy_pool', 'https://github.com/pyg-team/pytorch_geometric', 'https://github.com/saleor/saleor', 'https://github.com/zulip/zulip', 'https://github.com/jina-ai/jina', 'https://github.com/openai/openai-python', 'https://github.com/KurtBestor/Hitomi-Downloader', 'https://github.com/521xueweihan/GitHub520', 'https://github.com/ArchiveBox/ArchiveBox', 'https://github.com/facebookresearch/audiocraft', 'https://github.com/meta-llama/llama3', 'https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI', 'https://github.com/matplotlib/matplotlib', 'https://github.com/yoheinakajima/babyagi', 'https://github.com/Vonng/ddia', 'https://github.com/PromtEngineer/localGPT', 'https://github.com/vllm-project/vllm', 'https://github.com/ManimCommunity/manim', 'https://github.com/ungoogled-software/ungoogled-chromium', 'https://github.com/karpathy/minGPT', 'https://github.com/magenta/magenta', 'https://github.com/bokeh/bokeh', 'https://github.com/pydantic/pydantic', 'https://github.com/huggingface/datasets', 'https://github.com/microsoft/unilm', 'https://github.com/kholia/OSX-KVM', 'https://github.com/kovidgoyal/calibre', 'https://github.com/mkdocs/mkdocs', 'https://github.com/magic-wormhole/magic-wormhole', 'https://github.com/Delgan/loguru', 'https://github.com/lucidrains/vit-pytorch', 'https://github.com/nginx-proxy/nginx-proxy', 'https://github.com/recommenders-team/recommenders', 'https://github.com/RasaHQ/rasa', 'https://github.com/facebook/prophet', 'https://github.com/sanic-org/sanic', 'https://github.com/kaixindelele/ChatPaper', 'https://github.com/Jack-Cherish/python-spider', 'https://github.com/jantic/DeOldify', 'https://github.com/python/mypy', 'https://github.com/ymcui/Chinese-LLaMA-Alpaca', 'https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']
#github_urls = ['https://github.com/TencentARC/GFPGAN', 'https://github.com/apache/airflow', 'https://github.com/chenfei-wu/TaskMatrix', 'https://github.com/mitmproxy/mitmproxy', 'https://github.com/lm-sys/FastChat', 'https://github.com/comfyanonymous/ComfyUI', 'https://github.com/babysor/MockingBird', 'https://github.com/openai/gym', 'https://github.com/testerSunshine/12306', 'https://github.com/shadowsocks/shadowsocks', 'https://github.com/microsoft/DeepSpeed', 'https://github.com/XX-net/XX-Net', 'https://github.com/fxsjy/jieba', 'https://github.com/hankcs/HanLP', 'https://github.com/Asabeneh/30-Days-Of-Python', 'https://github.com/karpathy/nanoGPT', 'https://github.com/httpie/cli', 'https://github.com/streamlit/streamlit', 'https://github.com/ccxt/ccxt', 'https://github.com/run-llama/llama_index', 'https://github.com/ray-project/ray', 'https://github.com/certbot/certbot', 'https://github.com/sqlmapproject/sqlmap', 'https://github.com/geekcomputers/Python', 'https://github.com/huggingface/pytorch-image-models', 'https://github.com/coqui-ai/TTS', 'https://github.com/python-poetry/poetry', 'https://github.com/0xAX/linux-insides', 'https://github.com/facebookresearch/fairseq', 'https://github.com/gradio-app/gradio', 'https://github.com/yunjey/pytorch-tutorial', 'https://github.com/tatsu-lab/stanford_alpaca', 'https://github.com/explosion/spaCy', 'https://github.com/donnemartin/interactive-coding-challenges', 'https://github.com/facebookresearch/detectron2', 'https://github.com/Pythagora-io/gpt-pilot', 'https://github.com/google/jax', 'https://github.com/lllyasviel/ControlNet', 'https://github.com/acheong08/ChatGPT', 'https://github.com/open-mmlab/mmdetection', 'https://github.com/chatchat-space/Langchain-Chatchat', 'https://github.com/encode/django-rest-framework', 'https://github.com/tqdm/tqdm', 'https://github.com/Lightning-AI/pytorch-lightning', 'https://github.com/LC044/WeChatMsg', 'https://github.com/OWASP/CheatSheetSeries', 'https://github.com/donnemartin/data-science-ipython-notebooks', 'https://github.com/numpy/numpy', 'https://github.com/google/python-fire', 'https://github.com/xinntao/Real-ESRGAN', 'https://github.com/OpenBB-finance/OpenBBTerminal', 'https://github.com/facebookresearch/Detectron', 'https://github.com/freqtrade/freqtrade', 'https://github.com/StevenBlack/hosts', 'https://github.com/ycm-core/YouCompleteMe', 'https://github.com/spipm/Depix', 'https://github.com/zhayujie/chatgpt-on-wechat', 'https://github.com/littlecodersh/ItChat', 'https://github.com/nicolargo/glances', 'https://github.com/s0md3v/roop', 'https://github.com/getredash/redash', 'https://github.com/deezer/spleeter', 'https://github.com/Vision-CAIR/MiniGPT-4', 'https://github.com/python-telegram-bot/python-telegram-bot', 'https://github.com/pypa/pipenv', 'https://github.com/myshell-ai/OpenVoice', 'https://github.com/OpenDevin/OpenDevin', 'https://github.com/microsoft/cascadia-code', 'https://github.com/matterport/Mask_RCNN', 'https://github.com/tinygrad/tinygrad', 'https://github.com/svc-develop-team/so-vits-svc', 'https://github.com/RVC-Boss/GPT-SoVITS', 'https://github.com/jumpserver/jumpserver', 'https://github.com/locustio/locust', 'https://github.com/chubin/wttr.in', 'https://github.com/Textualize/textual', 'https://github.com/celery/celery', 'https://github.com/keon/algorithms', 'https://github.com/vnpy/vnpy', 'https://github.com/iperov/DeepFaceLive', 'https://github.com/ultralytics/ultralytics', 'https://github.com/eriklindernoren/ML-From-Scratch', 'https://github.com/microsoft/JARVIS', 'https://github.com/huggingface/diffusers', 'https://github.com/wangzheng0822/algo', 'https://github.com/mouredev/Hello-Python', 'https://github.com/Stability-AI/generative-models', 'https://github.com/sebastianruder/NLP-progress', 'https://github.com/JaidedAI/EasyOCR', 'https://github.com/kovidgoyal/kitty', 'https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix', 'https://github.com/HumanSignal/labelImg', 'https://github.com/d2l-ai/d2l-en', 'https://github.com/AtsushiSakai/PythonRobotics', 'https://github.com/pytorch/examples', 'https://github.com/cookiecutter/cookiecutter', 'https://github.com/tornadoweb/tornado', 'https://github.com/hiyouga/LLaMA-Factory', 'https://github.com/mindsdb/mindsdb', 'https://github.com/deepinsight/insightface', 'https://github.com/openai/gpt-2', 'https://github.com/luong-komorebi/Awesome-Linux-Software', 'https://github.com/WZMIAOMIAO/deep-learning-for-image-processing', 'https://github.com/drduh/macOS-Security-and-Privacy-Guide', 'https://github.com/openai/chatgpt-retrieval-plugin', 'https://github.com/plotly/dash', 'https://github.com/chriskiehl/Gooey', 'https://github.com/jhao104/proxy_pool', 'https://github.com/pyg-team/pytorch_geometric', 'https://github.com/saleor/saleor', 'https://github.com/zulip/zulip', 'https://github.com/jina-ai/jina', 'https://github.com/openai/openai-python', 'https://github.com/KurtBestor/Hitomi-Downloader', 'https://github.com/521xueweihan/GitHub520', 'https://github.com/ArchiveBox/ArchiveBox', 'https://github.com/facebookresearch/audiocraft', 'https://github.com/meta-llama/llama3', 'https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI', 'https://github.com/matplotlib/matplotlib', 'https://github.com/yoheinakajima/babyagi', 'https://github.com/Vonng/ddia', 'https://github.com/PromtEngineer/localGPT', 'https://github.com/vllm-project/vllm', 'https://github.com/ManimCommunity/manim', 'https://github.com/ungoogled-software/ungoogled-chromium', 'https://github.com/karpathy/minGPT', 'https://github.com/magenta/magenta', 'https://github.com/bokeh/bokeh', 'https://github.com/pydantic/pydantic', 'https://github.com/huggingface/datasets', 'https://github.com/microsoft/unilm', 'https://github.com/kholia/OSX-KVM', 'https://github.com/kovidgoyal/calibre', 'https://github.com/mkdocs/mkdocs', 'https://github.com/magic-wormhole/magic-wormhole', 'https://github.com/Delgan/loguru', 'https://github.com/lucidrains/vit-pytorch', 'https://github.com/nginx-proxy/nginx-proxy', 'https://github.com/recommenders-team/recommenders', 'https://github.com/RasaHQ/rasa', 'https://github.com/facebook/prophet', 'https://github.com/sanic-org/sanic', 'https://github.com/kaixindelele/ChatPaper', 'https://github.com/Jack-Cherish/python-spider', 'https://github.com/jantic/DeOldify', 'https://github.com/python/mypy', 'https://github.com/ymcui/Chinese-LLaMA-Alpaca', 'https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']

In [6]:
# Directory to store CSV files
output_dir = "github_repo_source_code"
os.makedirs(output_dir, exist_ok=True)

# Create a CSV file per GitHub repo
for url in github_urls:
    try:
        repo_name, concatenated_code = collect_source_code(url)
        csv_file_name = f"{repo_name}.csv"
        csv_file_path = os.path.join(output_dir, csv_file_name)
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow([concatenated_code])
        print(f"Successfully processed and saved {url} to {csv_file_name}")
    except Exception as e:
        print(f"Error processing {url}: {e}")

print("All repositories processed successfully.")


Cloning into 'GFPGAN'...


Successfully processed and saved https://github.com/TencentARC/GFPGAN to GFPGAN.csv


Cloning into 'airflow'...


Successfully processed and saved https://github.com/apache/airflow to airflow.csv


Cloning into 'TaskMatrix'...


Successfully processed and saved https://github.com/chenfei-wu/TaskMatrix to TaskMatrix.csv


Cloning into 'mitmproxy'...


Successfully processed and saved https://github.com/mitmproxy/mitmproxy to mitmproxy.csv


Cloning into 'FastChat'...


Successfully processed and saved https://github.com/lm-sys/FastChat to FastChat.csv


Cloning into 'ComfyUI'...


Successfully processed and saved https://github.com/comfyanonymous/ComfyUI to ComfyUI.csv


Cloning into 'MockingBird'...
Updating files: 100% (205/205), done.
Cloning into 'gym'...


Successfully processed and saved https://github.com/babysor/MockingBird to MockingBird.csv
Successfully processed and saved https://github.com/openai/gym to gym.csv


Cloning into '12306'...


Successfully processed and saved https://github.com/testerSunshine/12306 to 12306.csv


Cloning into 'shadowsocks'...


Successfully processed and saved https://github.com/shadowsocks/shadowsocks to shadowsocks.csv


Cloning into 'DeepSpeed'...
Updating files: 100% (1554/1554), done.


Successfully processed and saved https://github.com/microsoft/DeepSpeed to DeepSpeed.csv


Cloning into 'XX-Net'...


Successfully processed and saved https://github.com/XX-net/XX-Net to XX-Net.csv


Cloning into 'jieba'...


Successfully processed and saved https://github.com/fxsjy/jieba to jieba.csv


Cloning into 'HanLP'...


Successfully processed and saved https://github.com/hankcs/HanLP to HanLP.csv


Cloning into '30-Days-Of-Python'...


Successfully processed and saved https://github.com/Asabeneh/30-Days-Of-Python to 30-Days-Of-Python.csv


Cloning into 'nanoGPT'...


Successfully processed and saved https://github.com/karpathy/nanoGPT to nanoGPT.csv


Cloning into 'cli'...


Successfully processed and saved https://github.com/httpie/cli to cli.csv


Cloning into 'streamlit'...


Successfully processed and saved https://github.com/streamlit/streamlit to streamlit.csv


Cloning into 'ccxt'...
Updating files: 100% (5987/5987), done.


Successfully processed and saved https://github.com/ccxt/ccxt to ccxt.csv


Cloning into 'llama_index'...
Updating files: 100% (9994/9994), done.


Successfully processed and saved https://github.com/run-llama/llama_index to llama_index.csv


Cloning into 'ray'...
Updating files: 100% (7575/7575), done.


Successfully processed and saved https://github.com/ray-project/ray to ray.csv


Cloning into 'certbot'...


Successfully processed and saved https://github.com/certbot/certbot to certbot.csv


Cloning into 'sqlmap'...


Successfully processed and saved https://github.com/sqlmapproject/sqlmap to sqlmap.csv


Cloning into 'Python'...


Successfully processed and saved https://github.com/geekcomputers/Python to Python.csv


Cloning into 'pytorch-image-models'...


Successfully processed and saved https://github.com/huggingface/pytorch-image-models to pytorch-image-models.csv


Cloning into 'TTS'...


Successfully processed and saved https://github.com/coqui-ai/TTS to TTS.csv


Cloning into 'poetry'...


Successfully processed and saved https://github.com/python-poetry/poetry to poetry.csv


Cloning into 'linux-insides'...


Successfully processed and saved https://github.com/0xAX/linux-insides to linux-insides.csv


Cloning into 'fairseq'...


Successfully processed and saved https://github.com/facebookresearch/fairseq to fairseq.csv


Cloning into 'gradio'...


Successfully processed and saved https://github.com/gradio-app/gradio to gradio.csv


Cloning into 'pytorch-tutorial'...


Successfully processed and saved https://github.com/yunjey/pytorch-tutorial to pytorch-tutorial.csv


Cloning into 'stanford_alpaca'...


Successfully processed and saved https://github.com/tatsu-lab/stanford_alpaca to stanford_alpaca.csv


Cloning into 'spaCy'...


Successfully processed and saved https://github.com/explosion/spaCy to spaCy.csv


Cloning into 'interactive-coding-challenges'...


Successfully processed and saved https://github.com/donnemartin/interactive-coding-challenges to interactive-coding-challenges.csv


Cloning into 'detectron2'...


Successfully processed and saved https://github.com/facebookresearch/detectron2 to detectron2.csv


Cloning into 'gpt-pilot'...


Successfully processed and saved https://github.com/Pythagora-io/gpt-pilot to gpt-pilot.csv


Cloning into 'jax'...


Successfully processed and saved https://github.com/google/jax to jax.csv


Cloning into 'ControlNet'...


Successfully processed and saved https://github.com/lllyasviel/ControlNet to ControlNet.csv


Cloning into 'ChatGPT'...


Successfully processed and saved https://github.com/acheong08/ChatGPT to ChatGPT.csv


Cloning into 'mmdetection'...


Successfully processed and saved https://github.com/open-mmlab/mmdetection to mmdetection.csv


Cloning into 'Langchain-Chatchat'...


Successfully processed and saved https://github.com/chatchat-space/Langchain-Chatchat to Langchain-Chatchat.csv


Cloning into 'django-rest-framework'...


Successfully processed and saved https://github.com/encode/django-rest-framework to django-rest-framework.csv


Cloning into 'tqdm'...


Successfully processed and saved https://github.com/tqdm/tqdm to tqdm.csv


Cloning into 'pytorch-lightning'...


Successfully processed and saved https://github.com/Lightning-AI/pytorch-lightning to pytorch-lightning.csv


Cloning into 'WeChatMsg'...


Successfully processed and saved https://github.com/LC044/WeChatMsg to WeChatMsg.csv


Cloning into 'CheatSheetSeries'...


Successfully processed and saved https://github.com/OWASP/CheatSheetSeries to CheatSheetSeries.csv


Cloning into 'data-science-ipython-notebooks'...


Successfully processed and saved https://github.com/donnemartin/data-science-ipython-notebooks to data-science-ipython-notebooks.csv


Cloning into 'numpy'...


Successfully processed and saved https://github.com/numpy/numpy to numpy.csv


Cloning into 'python-fire'...


Successfully processed and saved https://github.com/google/python-fire to python-fire.csv


Cloning into 'Real-ESRGAN'...


Successfully processed and saved https://github.com/xinntao/Real-ESRGAN to Real-ESRGAN.csv


Cloning into 'OpenBBTerminal'...


Successfully processed and saved https://github.com/OpenBB-finance/OpenBBTerminal to OpenBBTerminal.csv


Cloning into 'Detectron'...


Successfully processed and saved https://github.com/facebookresearch/Detectron to Detectron.csv


Cloning into 'freqtrade'...


Successfully processed and saved https://github.com/freqtrade/freqtrade to freqtrade.csv


Cloning into 'hosts'...


Successfully processed and saved https://github.com/StevenBlack/hosts to hosts.csv


Cloning into 'YouCompleteMe'...


Successfully processed and saved https://github.com/ycm-core/YouCompleteMe to YouCompleteMe.csv


Cloning into 'Depix'...


Successfully processed and saved https://github.com/spipm/Depix to Depix.csv


Cloning into 'chatgpt-on-wechat'...


Successfully processed and saved https://github.com/zhayujie/chatgpt-on-wechat to chatgpt-on-wechat.csv


Cloning into 'ItChat'...


Successfully processed and saved https://github.com/littlecodersh/ItChat to ItChat.csv


Cloning into 'glances'...


Successfully processed and saved https://github.com/nicolargo/glances to glances.csv


Cloning into 'roop'...


Successfully processed and saved https://github.com/s0md3v/roop to roop.csv


Cloning into 'redash'...


Successfully processed and saved https://github.com/getredash/redash to redash.csv


Cloning into 'spleeter'...


Successfully processed and saved https://github.com/deezer/spleeter to spleeter.csv


Cloning into 'MiniGPT-4'...


Successfully processed and saved https://github.com/Vision-CAIR/MiniGPT-4 to MiniGPT-4.csv


Cloning into 'python-telegram-bot'...


Successfully processed and saved https://github.com/python-telegram-bot/python-telegram-bot to python-telegram-bot.csv


Cloning into 'pipenv'...


Successfully processed and saved https://github.com/pypa/pipenv to pipenv.csv


Cloning into 'OpenVoice'...


Successfully processed and saved https://github.com/myshell-ai/OpenVoice to OpenVoice.csv


Cloning into 'OpenDevin'...


Successfully processed and saved https://github.com/OpenDevin/OpenDevin to OpenDevin.csv


Cloning into 'cascadia-code'...
Updating files: 100% (41191/41191), done.


Successfully processed and saved https://github.com/microsoft/cascadia-code to cascadia-code.csv


Cloning into 'Mask_RCNN'...


Successfully processed and saved https://github.com/matterport/Mask_RCNN to Mask_RCNN.csv


Cloning into 'tinygrad'...


Successfully processed and saved https://github.com/tinygrad/tinygrad to tinygrad.csv


Cloning into 'so-vits-svc'...


Successfully processed and saved https://github.com/svc-develop-team/so-vits-svc to so-vits-svc.csv


Cloning into 'GPT-SoVITS'...


Successfully processed and saved https://github.com/RVC-Boss/GPT-SoVITS to GPT-SoVITS.csv


Cloning into 'jumpserver'...


Successfully processed and saved https://github.com/jumpserver/jumpserver to jumpserver.csv


Cloning into 'locust'...


Successfully processed and saved https://github.com/locustio/locust to locust.csv


Cloning into 'wttr.in'...


Successfully processed and saved https://github.com/chubin/wttr.in to wttr.in.csv


Cloning into 'textual'...


Successfully processed and saved https://github.com/Textualize/textual to textual.csv


Cloning into 'celery'...


Successfully processed and saved https://github.com/celery/celery to celery.csv


Cloning into 'algorithms'...


Successfully processed and saved https://github.com/keon/algorithms to algorithms.csv


Cloning into 'vnpy'...


Successfully processed and saved https://github.com/vnpy/vnpy to vnpy.csv


Cloning into 'DeepFaceLive'...
Updating files: 100% (696/696), done.


Successfully processed and saved https://github.com/iperov/DeepFaceLive to DeepFaceLive.csv


Cloning into 'ultralytics'...


Successfully processed and saved https://github.com/ultralytics/ultralytics to ultralytics.csv


Cloning into 'ML-From-Scratch'...


Successfully processed and saved https://github.com/eriklindernoren/ML-From-Scratch to ML-From-Scratch.csv


Cloning into 'JARVIS'...


Successfully processed and saved https://github.com/microsoft/JARVIS to JARVIS.csv


Cloning into 'diffusers'...


Successfully processed and saved https://github.com/huggingface/diffusers to diffusers.csv


Cloning into 'algo'...


Successfully processed and saved https://github.com/wangzheng0822/algo to algo.csv


Cloning into 'Hello-Python'...


Successfully processed and saved https://github.com/mouredev/Hello-Python to Hello-Python.csv


Cloning into 'generative-models'...


Successfully processed and saved https://github.com/Stability-AI/generative-models to generative-models.csv


Cloning into 'NLP-progress'...


Successfully processed and saved https://github.com/sebastianruder/NLP-progress to NLP-progress.csv


Cloning into 'EasyOCR'...
Updating files: 100% (313/313), done.
Cloning into 'kitty'...


Successfully processed and saved https://github.com/JaidedAI/EasyOCR to EasyOCR.csv
Successfully processed and saved https://github.com/kovidgoyal/kitty to kitty.csv


Cloning into 'pytorch-CycleGAN-and-pix2pix'...


Successfully processed and saved https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix to pytorch-CycleGAN-and-pix2pix.csv


Cloning into 'labelImg'...


Successfully processed and saved https://github.com/HumanSignal/labelImg to labelImg.csv


Cloning into 'd2l-en'...


Successfully processed and saved https://github.com/d2l-ai/d2l-en to d2l-en.csv


Cloning into 'PythonRobotics'...


Successfully processed and saved https://github.com/AtsushiSakai/PythonRobotics to PythonRobotics.csv


Cloning into 'examples'...


Successfully processed and saved https://github.com/pytorch/examples to examples.csv


Cloning into 'cookiecutter'...


Successfully processed and saved https://github.com/cookiecutter/cookiecutter to cookiecutter.csv


Cloning into 'tornado'...


Successfully processed and saved https://github.com/tornadoweb/tornado to tornado.csv


Cloning into 'LLaMA-Factory'...
Updating files: 100% (214/214), done.
Cloning into 'mindsdb'...


Successfully processed and saved https://github.com/hiyouga/LLaMA-Factory to LLaMA-Factory.csv
Successfully processed and saved https://github.com/mindsdb/mindsdb to mindsdb.csv


Cloning into 'insightface'...


Successfully processed and saved https://github.com/deepinsight/insightface to insightface.csv


Cloning into 'gpt-2'...


Successfully processed and saved https://github.com/openai/gpt-2 to gpt-2.csv


Cloning into 'Awesome-Linux-Software'...


Successfully processed and saved https://github.com/luong-komorebi/Awesome-Linux-Software to Awesome-Linux-Software.csv


Cloning into 'deep-learning-for-image-processing'...


Successfully processed and saved https://github.com/WZMIAOMIAO/deep-learning-for-image-processing to deep-learning-for-image-processing.csv


Cloning into 'macOS-Security-and-Privacy-Guide'...


Successfully processed and saved https://github.com/drduh/macOS-Security-and-Privacy-Guide to macOS-Security-and-Privacy-Guide.csv


Cloning into 'chatgpt-retrieval-plugin'...


Successfully processed and saved https://github.com/openai/chatgpt-retrieval-plugin to chatgpt-retrieval-plugin.csv


Cloning into 'dash'...


Successfully processed and saved https://github.com/plotly/dash to dash.csv


Cloning into 'Gooey'...


Successfully processed and saved https://github.com/chriskiehl/Gooey to Gooey.csv


Cloning into 'proxy_pool'...


Successfully processed and saved https://github.com/jhao104/proxy_pool to proxy_pool.csv


Cloning into 'pytorch_geometric'...


Successfully processed and saved https://github.com/pyg-team/pytorch_geometric to pytorch_geometric.csv


Cloning into 'saleor'...


Successfully processed and saved https://github.com/saleor/saleor to saleor.csv


Cloning into 'zulip'...


Successfully processed and saved https://github.com/zulip/zulip to zulip.csv


Cloning into 'jina'...


Successfully processed and saved https://github.com/jina-ai/jina to jina.csv


Cloning into 'openai-python'...


Successfully processed and saved https://github.com/openai/openai-python to openai-python.csv


Cloning into 'Hitomi-Downloader'...


Successfully processed and saved https://github.com/KurtBestor/Hitomi-Downloader to Hitomi-Downloader.csv


Cloning into 'GitHub520'...


Successfully processed and saved https://github.com/521xueweihan/GitHub520 to GitHub520.csv


Cloning into 'ArchiveBox'...


Error processing https://github.com/ArchiveBox/ArchiveBox: [Errno 2] No such file or directory: 'ArchiveBox/archivebox/vendor/atomicwrites.py'


Cloning into 'audiocraft'...


Successfully processed and saved https://github.com/facebookresearch/audiocraft to audiocraft.csv


Cloning into 'llama3'...


Successfully processed and saved https://github.com/meta-llama/llama3 to llama3.csv


Cloning into 'Retrieval-based-Voice-Conversion-WebUI'...


Successfully processed and saved https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI to Retrieval-based-Voice-Conversion-WebUI.csv


Cloning into 'matplotlib'...


Successfully processed and saved https://github.com/matplotlib/matplotlib to matplotlib.csv


Cloning into 'babyagi'...


Successfully processed and saved https://github.com/yoheinakajima/babyagi to babyagi.csv


Cloning into 'ddia'...


Successfully processed and saved https://github.com/Vonng/ddia to ddia.csv


Cloning into 'localGPT'...


Successfully processed and saved https://github.com/PromtEngineer/localGPT to localGPT.csv


Cloning into 'vllm'...


Successfully processed and saved https://github.com/vllm-project/vllm to vllm.csv


Cloning into 'manim'...


Successfully processed and saved https://github.com/ManimCommunity/manim to manim.csv


Cloning into 'ungoogled-chromium'...


Successfully processed and saved https://github.com/ungoogled-software/ungoogled-chromium to ungoogled-chromium.csv


Cloning into 'minGPT'...


Successfully processed and saved https://github.com/karpathy/minGPT to minGPT.csv


Cloning into 'magenta'...


Successfully processed and saved https://github.com/magenta/magenta to magenta.csv


Cloning into 'bokeh'...


Successfully processed and saved https://github.com/bokeh/bokeh to bokeh.csv


Cloning into 'pydantic'...


Successfully processed and saved https://github.com/pydantic/pydantic to pydantic.csv


Cloning into 'datasets'...


Successfully processed and saved https://github.com/huggingface/datasets to datasets.csv


Cloning into 'unilm'...
Updating files: 100% (5228/5228), done.


Successfully processed and saved https://github.com/microsoft/unilm to unilm.csv


Cloning into 'OSX-KVM'...


Successfully processed and saved https://github.com/kholia/OSX-KVM to OSX-KVM.csv


Cloning into 'calibre'...


Successfully processed and saved https://github.com/kovidgoyal/calibre to calibre.csv


Cloning into 'mkdocs'...


Successfully processed and saved https://github.com/mkdocs/mkdocs to mkdocs.csv


Cloning into 'magic-wormhole'...


Successfully processed and saved https://github.com/magic-wormhole/magic-wormhole to magic-wormhole.csv


Cloning into 'loguru'...


Successfully processed and saved https://github.com/Delgan/loguru to loguru.csv


Cloning into 'vit-pytorch'...


Successfully processed and saved https://github.com/lucidrains/vit-pytorch to vit-pytorch.csv


Cloning into 'nginx-proxy'...


Successfully processed and saved https://github.com/nginx-proxy/nginx-proxy to nginx-proxy.csv


Cloning into 'recommenders'...


Successfully processed and saved https://github.com/recommenders-team/recommenders to recommenders.csv


Cloning into 'rasa'...


Successfully processed and saved https://github.com/RasaHQ/rasa to rasa.csv


Cloning into 'prophet'...


Successfully processed and saved https://github.com/facebook/prophet to prophet.csv


Cloning into 'sanic'...


Successfully processed and saved https://github.com/sanic-org/sanic to sanic.csv


Cloning into 'ChatPaper'...


Successfully processed and saved https://github.com/kaixindelele/ChatPaper to ChatPaper.csv


Cloning into 'python-spider'...


Successfully processed and saved https://github.com/Jack-Cherish/python-spider to python-spider.csv


Cloning into 'DeOldify'...


Successfully processed and saved https://github.com/jantic/DeOldify to DeOldify.csv


Cloning into 'mypy'...


Successfully processed and saved https://github.com/python/mypy to mypy.csv


Cloning into 'Chinese-LLaMA-Alpaca'...


Successfully processed and saved https://github.com/ymcui/Chinese-LLaMA-Alpaca to Chinese-LLaMA-Alpaca.csv


Cloning into 'pyscript'...


Successfully processed and saved https://github.com/pyscript/pyscript to pyscript.csv


Cloning into 'posthog'...
Updating files: 100% (6383/6383), done.


Successfully processed and saved https://github.com/PostHog/posthog to posthog.csv


Cloning into 'mlflow'...
Updating files: 100% (4116/4116), done.


Successfully processed and saved https://github.com/mlflow/mlflow to mlflow.csv


Cloning into 'luigi'...


Successfully processed and saved https://github.com/spotify/luigi to luigi.csv


Cloning into 'wagtail'...


Successfully processed and saved https://github.com/wagtail/wagtail to wagtail.csv


Cloning into 'IOPaint'...


Successfully processed and saved https://github.com/Sanster/IOPaint to IOPaint.csv


Cloning into 'game-programmer'...


Successfully processed and saved https://github.com/miloyip/game-programmer to game-programmer.csv


Cloning into 'faker'...


Successfully processed and saved https://github.com/joke2k/faker to faker.csv


Cloning into 'mlc-llm'...


Successfully processed and saved https://github.com/mlc-ai/mlc-llm to mlc-llm.csv


Cloning into 'Ciphey'...


Successfully processed and saved https://github.com/Ciphey/Ciphey to Ciphey.csv


Cloning into 'zipline'...


Successfully processed and saved https://github.com/quantopian/zipline to zipline.csv


Cloning into 'paperless-ngx'...


Successfully processed and saved https://github.com/paperless-ngx/paperless-ngx to paperless-ngx.csv


Cloning into 'erpnext'...


Successfully processed and saved https://github.com/frappe/erpnext to erpnext.csv


Cloning into 'devika'...


Successfully processed and saved https://github.com/stitionai/devika to devika.csv


Cloning into 'inter'...


Successfully processed and saved https://github.com/rsms/inter to inter.csv


Cloning into 'kivy'...


Successfully processed and saved https://github.com/kivy/kivy to kivy.csv


Cloning into 'reflex'...


Successfully processed and saved https://github.com/reflex-dev/reflex to reflex.csv


Cloning into 'onnx'...


Successfully processed and saved https://github.com/onnx/onnx to onnx.csv


Cloning into 'reddit'...


Successfully processed and saved https://github.com/reddit-archive/reddit to reddit.csv


Cloning into 'Open-Sora'...


Successfully processed and saved https://github.com/hpcaitech/Open-Sora to Open-Sora.csv


Cloning into 'LLaVA'...


Successfully processed and saved https://github.com/haotian-liu/LLaVA to LLaVA.csv


Cloning into 'GPT_API_free'...


Successfully processed and saved https://github.com/chatanywhere/GPT_API_free to GPT_API_free.csv


Cloning into 'InstaPy'...


Successfully processed and saved https://github.com/InstaPy/InstaPy to InstaPy.csv


Cloning into 'pyspider'...


Successfully processed and saved https://github.com/binux/pyspider to pyspider.csv


Cloning into 'awesome-free-chatgpt'...


Successfully processed and saved https://github.com/LiLittleCat/awesome-free-chatgpt to awesome-free-chatgpt.csv


Cloning into 'PySnooper'...


Successfully processed and saved https://github.com/cool-RR/PySnooper to PySnooper.csv


Cloning into 'ml-stable-diffusion'...


Successfully processed and saved https://github.com/apple/ml-stable-diffusion to ml-stable-diffusion.csv


Cloning into 'ipython'...


Successfully processed and saved https://github.com/ipython/ipython to ipython.csv


Cloning into 'awesome-quant'...


Successfully processed and saved https://github.com/wilsonfreitas/awesome-quant to awesome-quant.csv


Cloning into 'avatarify-python'...


Successfully processed and saved https://github.com/alievk/avatarify-python to avatarify-python.csv


Cloning into 'sd-webui-controlnet'...


Successfully processed and saved https://github.com/Mikubill/sd-webui-controlnet to sd-webui-controlnet.csv


Cloning into 'autojump'...


Successfully processed and saved https://github.com/wting/autojump to autojump.csv


Cloning into 'learn-python'...


Successfully processed and saved https://github.com/trekhleb/learn-python to learn-python.csv


Cloning into 'PyTorch-GAN'...


Successfully processed and saved https://github.com/eriklindernoren/PyTorch-GAN to PyTorch-GAN.csv


Cloning into 'awesome-python-login-model'...


Successfully processed and saved https://github.com/Kr1s77/awesome-python-login-model to awesome-python-login-model.csv


Cloning into 'twint'...


Successfully processed and saved https://github.com/twintproject/twint to twint.csv


Cloning into 'ChatGLM2-6B'...


Successfully processed and saved https://github.com/THUDM/ChatGLM2-6B to ChatGLM2-6B.csv


Cloning into 'learn_python3_spider'...


Successfully processed and saved https://github.com/wistbean/learn_python3_spider to learn_python3_spider.csv


Cloning into 'neural-networks-and-deep-learning'...


Successfully processed and saved https://github.com/mnielsen/neural-networks-and-deep-learning to neural-networks-and-deep-learning.csv


Cloning into 'vision'...


Successfully processed and saved https://github.com/pytorch/vision to vision.csv


Cloning into 'Shadowrocket-ADBlock-Rules'...


Successfully processed and saved https://github.com/h2y/Shadowrocket-ADBlock-Rules to Shadowrocket-ADBlock-Rules.csv


Cloning into 'SMSBoom'...


Successfully processed and saved https://github.com/OpenEthan/SMSBoom to SMSBoom.csv


Cloning into 'baselines'...


Successfully processed and saved https://github.com/openai/baselines to baselines.csv


Cloning into 'plotly.py'...
Updating files: 100% (14054/14054), done.


Successfully processed and saved https://github.com/plotly/plotly.py to plotly.py.csv


Cloning into 'gensim'...


Successfully processed and saved https://github.com/piskvorky/gensim to gensim.csv


Cloning into 'awesome-oss-alternatives'...


Successfully processed and saved https://github.com/RunaCapital/awesome-oss-alternatives to awesome-oss-alternatives.csv


Cloning into 'codellama'...


Successfully processed and saved https://github.com/meta-llama/codellama to codellama.csv


Cloning into 'click'...


Successfully processed and saved https://github.com/pallets/click to click.csv


Cloning into 'spotify-downloader'...


Successfully processed and saved https://github.com/spotDL/spotify-downloader to spotify-downloader.csv


Cloning into 'changedetection.io'...


Successfully processed and saved https://github.com/dgtlmoon/changedetection.io to changedetection.io.csv


Cloning into 'ultimatevocalremovergui'...


Successfully processed and saved https://github.com/Anjok07/ultimatevocalremovergui to ultimatevocalremovergui.csv


Cloning into 'netbox'...


Successfully processed and saved https://github.com/netbox-community/netbox to netbox.csv


Cloning into 'GHunt'...


Successfully processed and saved https://github.com/mxrch/GHunt to GHunt.csv


Cloning into 'ranger'...


Successfully processed and saved https://github.com/ranger/ranger to ranger.csv


Cloning into 'tensor2tensor'...


Successfully processed and saved https://github.com/tensorflow/tensor2tensor to tensor2tensor.csv


Cloning into 'aws-cli'...


Successfully processed and saved https://github.com/aws/aws-cli to aws-cli.csv


Cloning into 'frigate'...


Successfully processed and saved https://github.com/blakeblackshear/frigate to frigate.csv


Cloning into 'voice-changer'...


Successfully processed and saved https://github.com/w-okada/voice-changer to voice-changer.csv


Cloning into 'ChuanhuChatGPT'...


Successfully processed and saved https://github.com/GaiZhenbiao/ChuanhuChatGPT to ChuanhuChatGPT.csv


Cloning into 'prefect'...


Successfully processed and saved https://github.com/PrefectHQ/prefect to prefect.csv


Cloning into 'jupyter'...


Successfully processed and saved https://github.com/jupyter/jupyter to jupyter.csv


Cloning into 'facefusion'...


Successfully processed and saved https://github.com/facefusion/facefusion to facefusion.csv


Cloning into 'rembg'...


Successfully processed and saved https://github.com/danielgatis/rembg to rembg.csv


Cloning into 'dalle-mini'...


Successfully processed and saved https://github.com/borisdayma/dalle-mini to dalle-mini.csv


Cloning into 'fabric'...


Successfully processed and saved https://github.com/fabric/fabric to fabric.csv


Cloning into 'aiohttp'...


Successfully processed and saved https://github.com/aio-libs/aiohttp to aiohttp.csv


Cloning into 'numpy-ml'...


Successfully processed and saved https://github.com/ddbourgin/numpy-ml to numpy-ml.csv


Cloning into 'SuperAGI'...


Successfully processed and saved https://github.com/TransformerOptimus/SuperAGI to SuperAGI.csv


Cloning into 'Bringing-Old-Photos-Back-to-Life'...


Successfully processed and saved https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life to Bringing-Old-Photos-Back-to-Life.csv


Cloning into 'pyecharts'...


Successfully processed and saved https://github.com/pyecharts/pyecharts to pyecharts.csv


Cloning into 'typer'...


Successfully processed and saved https://github.com/tiangolo/typer to typer.csv


Cloning into 'discord.py'...


Successfully processed and saved https://github.com/Rapptz/discord.py to discord.py.csv


Cloning into 'fauxpilot'...


Successfully processed and saved https://github.com/fauxpilot/fauxpilot to fauxpilot.csv


Cloning into 'mackup'...


Successfully processed and saved https://github.com/lra/mackup to mackup.csv


Cloning into 'DeDRM_tools'...


Successfully processed and saved https://github.com/apprenticeharper/DeDRM_tools to DeDRM_tools.csv


Cloning into 'qlib'...


Successfully processed and saved https://github.com/microsoft/qlib to qlib.csv


Cloning into 'networkx'...


Successfully processed and saved https://github.com/networkx/networkx to networkx.csv


Cloning into 'powerline'...


Successfully processed and saved https://github.com/powerline/powerline to powerline.csv


Cloning into 'DocsGPT'...


Successfully processed and saved https://github.com/arc53/DocsGPT to DocsGPT.csv


Cloning into 'python-mini-projects'...


Successfully processed and saved https://github.com/Python-World/python-mini-projects to python-mini-projects.csv


Cloning into 'airbyte'...
Updating files: 100% (15906/15906), done.


Successfully processed and saved https://github.com/airbytehq/airbyte to airbyte.csv


Cloning into 'imgaug'...


Successfully processed and saved https://github.com/aleju/imgaug to imgaug.csv


Cloning into 'supervision'...


Successfully processed and saved https://github.com/roboflow/supervision to supervision.csv


Cloning into 'py12306'...


Successfully processed and saved https://github.com/pjialin/py12306 to py12306.csv


Cloning into 'the-gan-zoo'...


Successfully processed and saved https://github.com/hindupuravinash/the-gan-zoo to the-gan-zoo.csv


Cloning into 'ivy'...


Successfully processed and saved https://github.com/unifyai/ivy to ivy.csv


Cloning into 'evals'...


Successfully processed and saved https://github.com/openai/evals to evals.csv


Cloning into 'horovod'...


Successfully processed and saved https://github.com/horovod/horovod to horovod.csv


Cloning into 'peft'...


Successfully processed and saved https://github.com/huggingface/peft to peft.csv


Cloning into 'stylegan'...


Successfully processed and saved https://github.com/NVlabs/stylegan to stylegan.csv


Cloning into 'YYeTsBot'...


Successfully processed and saved https://github.com/tgbot-collection/YYeTsBot to YYeTsBot.csv


Cloning into 'ChatterBot'...


Successfully processed and saved https://github.com/gunthercox/ChatterBot to ChatterBot.csv


Cloning into 'sentence-transformers'...


Successfully processed and saved https://github.com/UKPLab/sentence-transformers to sentence-transformers.csv


Cloning into 'salt'...


Successfully processed and saved https://github.com/saltstack/salt to salt.csv


Cloning into 'wechat_jump_game'...


Successfully processed and saved https://github.com/wangshub/wechat_jump_game to wechat_jump_game.csv


Cloning into 'wxpy'...


Successfully processed and saved https://github.com/youfou/wxpy to wxpy.csv


Cloning into 'nni'...


Successfully processed and saved https://github.com/microsoft/nni to nni.csv


Cloning into 'haystack'...


Successfully processed and saved https://github.com/deepset-ai/haystack to haystack.csv


Cloning into 'newspaper'...


Successfully processed and saved https://github.com/codelucas/newspaper to newspaper.csv


Cloning into 'crewAI'...


Successfully processed and saved https://github.com/joaomdmoura/crewAI to crewAI.csv


Cloning into 'yapf'...


Successfully processed and saved https://github.com/google/yapf to yapf.csv


Cloning into 'requests-html'...


Successfully processed and saved https://github.com/psf/requests-html to requests-html.csv


Cloning into 'flair'...


Successfully processed and saved https://github.com/flairNLP/flair to flair.csv


Cloning into 'CodeFormer'...


Successfully processed and saved https://github.com/sczhou/CodeFormer to CodeFormer.csv


Cloning into 'examples-of-web-crawlers'...


Successfully processed and saved https://github.com/shengqiangzhang/examples-of-web-crawlers to examples-of-web-crawlers.csv


Cloning into 'facenet'...


Successfully processed and saved https://github.com/davidsandberg/facenet to facenet.csv


Cloning into 'MediaCrawler'...


Successfully processed and saved https://github.com/NanmiCoder/MediaCrawler to MediaCrawler.csv


Cloning into 'awx'...


Successfully processed and saved https://github.com/ansible/awx to awx.csv


Cloning into 'albumentations'...


Successfully processed and saved https://github.com/albumentations-team/albumentations to albumentations.csv


Cloning into 'zhao'...


Successfully processed and saved https://github.com/programthink/zhao to zhao.csv


Cloning into 'mailinabox'...


Successfully processed and saved https://github.com/mail-in-a-box/mailinabox to mailinabox.csv


Cloning into 'speedtest-cli'...


Successfully processed and saved https://github.com/sivel/speedtest-cli to speedtest-cli.csv


Cloning into 'searx'...


Successfully processed and saved https://github.com/searx/searx to searx.csv


Cloning into 'reinforcement-learning-an-introduction'...


Successfully processed and saved https://github.com/ShangtongZhang/reinforcement-learning-an-introduction to reinforcement-learning-an-introduction.csv


Cloning into 'dvc'...


Successfully processed and saved https://github.com/iterative/dvc to dvc.csv


Cloning into 'PySimpleGUI'...


Successfully processed and saved https://github.com/PySimpleGUI/PySimpleGUI to PySimpleGUI.csv


Cloning into 'backtrader'...


Successfully processed and saved https://github.com/mementum/backtrader to backtrader.csv


Cloning into 'sqlmodel'...


Successfully processed and saved https://github.com/tiangolo/sqlmodel to sqlmodel.csv


Cloning into 'nltk'...


Successfully processed and saved https://github.com/nltk/nltk to nltk.csv


Cloning into 'dgl'...


Successfully processed and saved https://github.com/dmlc/dgl to dgl.csv


Cloning into 'Swin-Transformer'...


Successfully processed and saved https://github.com/microsoft/Swin-Transformer to Swin-Transformer.csv


Cloning into 'transferlearning'...


Successfully processed and saved https://github.com/jindongwang/transferlearning to transferlearning.csv


Cloning into 'detr'...


Successfully processed and saved https://github.com/facebookresearch/detr to detr.csv


Cloning into 'explainshell'...


Successfully processed and saved https://github.com/idank/explainshell to explainshell.csv


Cloning into 'XSStrike'...


Successfully processed and saved https://github.com/s0md3v/XSStrike to XSStrike.csv


Cloning into 'impacket'...


Successfully processed and saved https://github.com/fortra/impacket to impacket.csv


Cloning into 'mihomo'...


Successfully processed and saved https://github.com/MetaCubeX/mihomo to mihomo.csv


Cloning into 'wifiphisher'...


Successfully processed and saved https://github.com/wifiphisher/wifiphisher to wifiphisher.csv


Cloning into 'AutoEq'...
Updating files: 100% (63620/63620), done.


Successfully processed and saved https://github.com/jaakkopasanen/AutoEq to AutoEq.csv


Cloning into 'tushare'...


Successfully processed and saved https://github.com/waditu/tushare to tushare.csv


Cloning into 'edgedb'...


Successfully processed and saved https://github.com/edgedb/edgedb to edgedb.csv


Cloning into 'memray'...


Successfully processed and saved https://github.com/bloomberg/memray to memray.csv


Cloning into 'EIPs'...


Successfully processed and saved https://github.com/ethereum/EIPs to EIPs.csv


Cloning into 'PaddleHub'...
Updating files: 100% (2956/2956), done.


Successfully processed and saved https://github.com/PaddlePaddle/PaddleHub to PaddleHub.csv


Cloning into 'scipy'...


Successfully processed and saved https://github.com/scipy/scipy to scipy.csv


Cloning into 'chroma'...


Successfully processed and saved https://github.com/chroma-core/chroma to chroma.csv


Cloning into 'sympy'...


Successfully processed and saved https://github.com/sympy/sympy to sympy.csv


Cloning into 'beets'...


Successfully processed and saved https://github.com/beetbox/beets to beets.csv


Cloning into 'httpbin'...


Successfully processed and saved https://github.com/postmanlabs/httpbin to httpbin.csv


Cloning into 'labelme'...


Successfully processed and saved https://github.com/labelmeai/labelme to labelme.csv


Cloning into 'openage'...


Successfully processed and saved https://github.com/SFTtech/openage to openage.csv


Cloning into 'httpx'...


Successfully processed and saved https://github.com/encode/httpx to httpx.csv


Cloning into 'redis-py'...


Successfully processed and saved https://github.com/redis/redis-py to redis-py.csv


Cloning into 'pelican'...


Successfully processed and saved https://github.com/getpelican/pelican to pelican.csv


Cloning into 'ChatGLM3'...


Successfully processed and saved https://github.com/THUDM/ChatGLM3 to ChatGLM3.csv


Cloning into 'clip-as-service'...


Successfully processed and saved https://github.com/jina-ai/clip-as-service to clip-as-service.csv


Cloning into 'awesome-aws'...


Successfully processed and saved https://github.com/donnemartin/awesome-aws to awesome-aws.csv


Cloning into 'pyright'...


Successfully processed and saved https://github.com/microsoft/pyright to pyright.csv


Cloning into 'pre-commit'...


Successfully processed and saved https://github.com/pre-commit/pre-commit to pre-commit.csv


Cloning into 'PaddleDetection'...


Successfully processed and saved https://github.com/PaddlePaddle/PaddleDetection to PaddleDetection.csv


Cloning into 'OCRmyPDF'...


Successfully processed and saved https://github.com/ocrmypdf/OCRmyPDF to OCRmyPDF.csv


Cloning into 'chatgpt-mirai-qq-bot'...


Successfully processed and saved https://github.com/lss233/chatgpt-mirai-qq-bot to chatgpt-mirai-qq-bot.csv


Cloning into 'ydata-profiling'...


Successfully processed and saved https://github.com/ydataai/ydata-profiling to ydata-profiling.csv


Cloning into 'dask'...


Successfully processed and saved https://github.com/dask/dask to dask.csv


Cloning into 'seaborn'...


Successfully processed and saved https://github.com/mwaskom/seaborn to seaborn.csv


Cloning into 'yfinance'...


Successfully processed and saved https://github.com/ranaroussi/yfinance to yfinance.csv


Cloning into 'pix2code'...
Updating files: 100% (43/43), done.


Successfully processed and saved https://github.com/tonybeltramelli/pix2code to pix2code.csv


Cloning into 'routersploit'...


Successfully processed and saved https://github.com/threat9/routersploit to routersploit.csv


Cloning into 'Zappa'...


Successfully processed and saved https://github.com/Miserlou/Zappa to Zappa.csv


Cloning into 'neural-enhance'...


Successfully processed and saved https://github.com/alexjc/neural-enhance to neural-enhance.csv


Cloning into 'moviepy'...


Successfully processed and saved https://github.com/Zulko/moviepy to moviepy.csv


Cloning into 'walle-web'...


Successfully processed and saved https://github.com/meolu/walle-web to walle-web.csv


Cloning into 'MOSS'...


Successfully processed and saved https://github.com/OpenMOSS/MOSS to MOSS.csv


Cloning into 'spiderfoot'...


Successfully processed and saved https://github.com/smicallef/spiderfoot to spiderfoot.csv


Cloning into 'synapse'...


Successfully processed and saved https://github.com/matrix-org/synapse to synapse.csv


Cloning into 'alphafold'...


Successfully processed and saved https://github.com/google-deepmind/alphafold to alphafold.csv


Cloning into 'pgcli'...


Successfully processed and saved https://github.com/dbcli/pgcli to pgcli.csv


Cloning into 'Pillow'...


Successfully processed and saved https://github.com/python-pillow/Pillow to Pillow.csv


Cloning into 'RWKV-LM'...


Successfully processed and saved https://github.com/BlinkDL/RWKV-LM to RWKV-LM.csv


Cloning into 'allennlp'...


Successfully processed and saved https://github.com/allenai/allennlp to allennlp.csv


Cloning into 'Llama-Chinese'...


Successfully processed and saved https://github.com/LlamaFamily/Llama-Chinese to Llama-Chinese.csv


Cloning into 'developer'...


Successfully processed and saved https://github.com/smol-ai/developer to developer.csv


Cloning into 'calibre-web'...


Successfully processed and saved https://github.com/janeczku/calibre-web to calibre-web.csv


Cloning into 'Chinese-Word-Vectors'...


Successfully processed and saved https://github.com/Embedding/Chinese-Word-Vectors to Chinese-Word-Vectors.csv


Cloning into 'cookiecutter-django'...


Successfully processed and saved https://github.com/cookiecutter/cookiecutter-django to cookiecutter-django.csv


Cloning into 'numpy-100'...


Successfully processed and saved https://github.com/rougier/numpy-100 to numpy-100.csv


Cloning into 'fashion-mnist'...


Successfully processed and saved https://github.com/zalandoresearch/fashion-mnist to fashion-mnist.csv
All repositories processed successfully.


In [5]:
import os
import subprocess
import csv
from pathlib import Path
import shutil
import requests
import os
import re
import pandas as pd
from urllib.parse import urlparse, quote
from urllib.parse import urlparse

# Function to clone a GitHub repository and collect all source code into a single string
def collect_source_code(repo_url):
    # Extract the repo name from the URL
    repo_name = repo_url.rstrip('/').split('/')[-1]
    subprocess.run(['git', 'clone', repo_url], check=True)
    
    # Collect all source code files into a single string
    source_code = []
    for root, dirs, files in os.walk(repo_name):
        for file in files:
            # Filter for source code files only (adjust filters as needed)
            if file.endswith(('.py', '.js', '.java', '.cpp', '.c', '.h', '.html', '.css', '.ts', '.go', '.rb', '.php')):
                file_path = os.path.join(root, file)
                with open(file_path, 'r', errors='ignore') as f:
                    source_code.append(f.read())
                    
    # Join all source code files as one big string
    concatenated_code = "\n".join(source_code)
    print(type(concatenated_code))
    
    # Delete the repo after extraction
    shutil.rmtree(repo_name)
    
    return repo_name, concatenated_code



In [9]:
# Replace this list with your own list of 300 URLs
#github_urls = ['https://github.com/public-apis/public-apis', 'https://github.com/donnemartin/system-design-primer', 'https://github.com/vinta/awesome-python', 'https://github.com/TheAlgorithms/Python', 'https://github.com/jackfrued/Python-100-Days', 'https://github.com/AUTOMATIC1111/stable-diffusion-webui', 'https://github.com/ytdl-org/youtube-dl', 'https://github.com/huggingface/transformers', 'https://github.com/521xueweihan/HelloGitHub', 'https://github.com/langchain-ai/langchain', 'https://github.com/nvbn/thefuck', 'https://github.com/pytorch/pytorch', 'https://github.com/django/django', 'https://github.com/tensorflow/models', 'https://github.com/yt-dlp/yt-dlp', 'https://github.com/tiangolo/fastapi', 'https://github.com/home-assistant/core', 'https://github.com/pallets/flask', 'https://github.com/fighting41love/funNLP', 'https://github.com/bregman-arie/devops-exercises', 'https://github.com/josephmisiti/awesome-machine-learning', 'https://github.com/ansible/ansible', 'https://github.com/keras-team/keras', 'https://github.com/openai/whisper', 'https://github.com/python/cpython', 'https://github.com/3b1b/manim', 'https://github.com/scikit-learn/scikit-learn', 'https://github.com/xtekky/gpt4free', 'https://github.com/binary-husky/gpt_academic', 'https://github.com/d2l-ai/d2l-zh', 'https://github.com/swisskyrepo/PayloadsAllTheThings', 'https://github.com/meta-llama/llama', 'https://github.com/localstack/localstack', 'https://github.com/zylon-ai/private-gpt', 'https://github.com/ageitgey/face_recognition', 'https://github.com/sherlock-project/sherlock', 'https://github.com/psf/requests', 'https://github.com/scrapy/scrapy', 'https://github.com/CorentinJ/Real-Time-Voice-Cloning', 'https://github.com/gpt-engineer-org/gpt-engineer', 'https://github.com/abi/screenshot-to-code', 'https://github.com/deepfakes/faceswap', 'https://github.com/soimort/you-get', 'https://github.com/OpenInterpreter/open-interpreter', 'https://github.com/xai-org/grok-1', 'https://github.com/commaai/openpilot', 'https://github.com/Textualize/rich', 'https://github.com/ultralytics/yolov5', 'https://github.com/minimaxir/big-list-of-naughty-strings', 'https://github.com/iperov/DeepFaceLab', 'https://github.com/charlax/professional-programming', 'https://github.com/Z4nzu/hackingtool', 'https://github.com/pandas-dev/pandas', 'https://github.com/isocpp/CppCoreGuidelines', 'https://github.com/geekan/MetaGPT', 'https://github.com/faif/python-patterns', 'https://github.com/THUDM/ChatGLM-6B', 'https://github.com/PaddlePaddle/PaddleOCR', 'https://github.com/apachecn/ailearning', 'https://github.com/hpcaitech/ColossalAI', 'https://github.com/chubin/cheat.sh', 'https://github.com/psf/black', 'https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap', 'https://github.com/google-research/bert', 'https://github.com/getsentry/sentry', 'https://github.com/oobabooga/text-generation-webui', 'https://github.com/LAION-AI/Open-Assistant', 'https://github.com/Stability-AI/stablediffusion', 'https://github.com/0voice/interview_internal_reference', 'https://github.com/gto76/python-cheatsheet', 'https://github.com/lllyasviel/Fooocus', 'https://github.com/XingangPan/DragGAN', 'https://github.com/satwikkansal/wtfpython', 'https://github.com/mingrammer/diagrams', 'https://github.com/odoo/odoo', 'https://github.com/TencentARC/GFPGAN', 'https://github.com/apache/airflow', 'https://github.com/chenfei-wu/TaskMatrix', 'https://github.com/mitmproxy/mitmproxy', 'https://github.com/lm-sys/FastChat', 'https://github.com/comfyanonymous/ComfyUI', 'https://github.com/babysor/MockingBird', 'https://github.com/openai/gym', 'https://github.com/testerSunshine/12306', 'https://github.com/shadowsocks/shadowsocks', 'https://github.com/microsoft/DeepSpeed', 'https://github.com/XX-net/XX-Net', 'https://github.com/fxsjy/jieba', 'https://github.com/hankcs/HanLP', 'https://github.com/Asabeneh/30-Days-Of-Python', 'https://github.com/karpathy/nanoGPT', 'https://github.com/httpie/cli', 'https://github.com/streamlit/streamlit', 'https://github.com/ccxt/ccxt', 'https://github.com/run-llama/llama_index', 'https://github.com/ray-project/ray', 'https://github.com/certbot/certbot', 'https://github.com/sqlmapproject/sqlmap', 'https://github.com/geekcomputers/Python', 'https://github.com/huggingface/pytorch-image-models', 'https://github.com/coqui-ai/TTS', 'https://github.com/python-poetry/poetry', 'https://github.com/0xAX/linux-insides', 'https://github.com/facebookresearch/fairseq', 'https://github.com/gradio-app/gradio', 'https://github.com/yunjey/pytorch-tutorial', 'https://github.com/tatsu-lab/stanford_alpaca', 'https://github.com/explosion/spaCy', 'https://github.com/donnemartin/interactive-coding-challenges', 'https://github.com/facebookresearch/detectron2', 'https://github.com/Pythagora-io/gpt-pilot', 'https://github.com/google/jax', 'https://github.com/lllyasviel/ControlNet', 'https://github.com/acheong08/ChatGPT', 'https://github.com/open-mmlab/mmdetection', 'https://github.com/chatchat-space/Langchain-Chatchat', 'https://github.com/encode/django-rest-framework', 'https://github.com/tqdm/tqdm', 'https://github.com/Lightning-AI/pytorch-lightning', 'https://github.com/LC044/WeChatMsg', 'https://github.com/OWASP/CheatSheetSeries', 'https://github.com/donnemartin/data-science-ipython-notebooks', 'https://github.com/numpy/numpy', 'https://github.com/google/python-fire', 'https://github.com/xinntao/Real-ESRGAN', 'https://github.com/OpenBB-finance/OpenBBTerminal', 'https://github.com/facebookresearch/Detectron', 'https://github.com/freqtrade/freqtrade', 'https://github.com/StevenBlack/hosts', 'https://github.com/ycm-core/YouCompleteMe', 'https://github.com/spipm/Depix', 'https://github.com/zhayujie/chatgpt-on-wechat', 'https://github.com/littlecodersh/ItChat', 'https://github.com/nicolargo/glances', 'https://github.com/s0md3v/roop', 'https://github.com/getredash/redash', 'https://github.com/deezer/spleeter', 'https://github.com/Vision-CAIR/MiniGPT-4', 'https://github.com/python-telegram-bot/python-telegram-bot', 'https://github.com/pypa/pipenv', 'https://github.com/myshell-ai/OpenVoice', 'https://github.com/OpenDevin/OpenDevin', 'https://github.com/microsoft/cascadia-code', 'https://github.com/matterport/Mask_RCNN', 'https://github.com/tinygrad/tinygrad', 'https://github.com/svc-develop-team/so-vits-svc', 'https://github.com/RVC-Boss/GPT-SoVITS', 'https://github.com/jumpserver/jumpserver', 'https://github.com/locustio/locust', 'https://github.com/chubin/wttr.in', 'https://github.com/Textualize/textual', 'https://github.com/celery/celery', 'https://github.com/keon/algorithms', 'https://github.com/vnpy/vnpy', 'https://github.com/iperov/DeepFaceLive', 'https://github.com/ultralytics/ultralytics', 'https://github.com/eriklindernoren/ML-From-Scratch', 'https://github.com/microsoft/JARVIS', 'https://github.com/huggingface/diffusers', 'https://github.com/wangzheng0822/algo', 'https://github.com/mouredev/Hello-Python', 'https://github.com/Stability-AI/generative-models', 'https://github.com/sebastianruder/NLP-progress', 'https://github.com/JaidedAI/EasyOCR', 'https://github.com/kovidgoyal/kitty', 'https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix', 'https://github.com/HumanSignal/labelImg', 'https://github.com/d2l-ai/d2l-en', 'https://github.com/AtsushiSakai/PythonRobotics', 'https://github.com/pytorch/examples', 'https://github.com/cookiecutter/cookiecutter', 'https://github.com/tornadoweb/tornado', 'https://github.com/hiyouga/LLaMA-Factory', 'https://github.com/mindsdb/mindsdb', 'https://github.com/deepinsight/insightface', 'https://github.com/openai/gpt-2', 'https://github.com/luong-komorebi/Awesome-Linux-Software', 'https://github.com/WZMIAOMIAO/deep-learning-for-image-processing', 'https://github.com/drduh/macOS-Security-and-Privacy-Guide', 'https://github.com/openai/chatgpt-retrieval-plugin', 'https://github.com/plotly/dash', 'https://github.com/chriskiehl/Gooey', 'https://github.com/jhao104/proxy_pool', 'https://github.com/pyg-team/pytorch_geometric', 'https://github.com/saleor/saleor', 'https://github.com/zulip/zulip', 'https://github.com/jina-ai/jina', 'https://github.com/openai/openai-python', 'https://github.com/KurtBestor/Hitomi-Downloader', 'https://github.com/521xueweihan/GitHub520', 'https://github.com/ArchiveBox/ArchiveBox', 'https://github.com/facebookresearch/audiocraft', 'https://github.com/meta-llama/llama3', 'https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI', 'https://github.com/matplotlib/matplotlib', 'https://github.com/yoheinakajima/babyagi', 'https://github.com/Vonng/ddia', 'https://github.com/PromtEngineer/localGPT', 'https://github.com/vllm-project/vllm', 'https://github.com/ManimCommunity/manim', 'https://github.com/ungoogled-software/ungoogled-chromium', 'https://github.com/karpathy/minGPT', 'https://github.com/magenta/magenta', 'https://github.com/bokeh/bokeh', 'https://github.com/pydantic/pydantic', 'https://github.com/huggingface/datasets', 'https://github.com/microsoft/unilm', 'https://github.com/kholia/OSX-KVM', 'https://github.com/kovidgoyal/calibre', 'https://github.com/mkdocs/mkdocs', 'https://github.com/magic-wormhole/magic-wormhole', 'https://github.com/Delgan/loguru', 'https://github.com/lucidrains/vit-pytorch', 'https://github.com/nginx-proxy/nginx-proxy', 'https://github.com/recommenders-team/recommenders', 'https://github.com/RasaHQ/rasa', 'https://github.com/facebook/prophet', 'https://github.com/sanic-org/sanic', 'https://github.com/kaixindelele/ChatPaper', 'https://github.com/Jack-Cherish/python-spider', 'https://github.com/jantic/DeOldify', 'https://github.com/python/mypy', 'https://github.com/ymcui/Chinese-LLaMA-Alpaca', 'https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']
github_urls = ['https://github.com/TencentARC/GFPGAN', 'https://github.com/apache/airflow', 'https://github.com/chenfei-wu/TaskMatrix', 'https://github.com/mitmproxy/mitmproxy', 'https://github.com/lm-sys/FastChat', 'https://github.com/comfyanonymous/ComfyUI', 'https://github.com/babysor/MockingBird', 'https://github.com/openai/gym', 'https://github.com/testerSunshine/12306', 'https://github.com/shadowsocks/shadowsocks', 'https://github.com/microsoft/DeepSpeed', 'https://github.com/XX-net/XX-Net', 'https://github.com/fxsjy/jieba', 'https://github.com/hankcs/HanLP', 'https://github.com/Asabeneh/30-Days-Of-Python', 'https://github.com/karpathy/nanoGPT', 'https://github.com/httpie/cli', 'https://github.com/streamlit/streamlit', 'https://github.com/ccxt/ccxt', 'https://github.com/run-llama/llama_index', 'https://github.com/ray-project/ray', 'https://github.com/certbot/certbot', 'https://github.com/sqlmapproject/sqlmap', 'https://github.com/geekcomputers/Python', 'https://github.com/huggingface/pytorch-image-models', 'https://github.com/coqui-ai/TTS', 'https://github.com/python-poetry/poetry', 'https://github.com/0xAX/linux-insides', 'https://github.com/facebookresearch/fairseq', 'https://github.com/gradio-app/gradio', 'https://github.com/yunjey/pytorch-tutorial', 'https://github.com/tatsu-lab/stanford_alpaca', 'https://github.com/explosion/spaCy', 'https://github.com/donnemartin/interactive-coding-challenges', 'https://github.com/facebookresearch/detectron2', 'https://github.com/Pythagora-io/gpt-pilot', 'https://github.com/google/jax', 'https://github.com/lllyasviel/ControlNet', 'https://github.com/acheong08/ChatGPT', 'https://github.com/open-mmlab/mmdetection', 'https://github.com/chatchat-space/Langchain-Chatchat', 'https://github.com/encode/django-rest-framework', 'https://github.com/tqdm/tqdm', 'https://github.com/Lightning-AI/pytorch-lightning', 'https://github.com/LC044/WeChatMsg', 'https://github.com/OWASP/CheatSheetSeries', 'https://github.com/donnemartin/data-science-ipython-notebooks', 'https://github.com/numpy/numpy', 'https://github.com/google/python-fire', 'https://github.com/xinntao/Real-ESRGAN', 'https://github.com/OpenBB-finance/OpenBBTerminal', 'https://github.com/facebookresearch/Detectron', 'https://github.com/freqtrade/freqtrade', 'https://github.com/StevenBlack/hosts', 'https://github.com/ycm-core/YouCompleteMe', 'https://github.com/spipm/Depix', 'https://github.com/zhayujie/chatgpt-on-wechat', 'https://github.com/littlecodersh/ItChat', 'https://github.com/nicolargo/glances', 'https://github.com/s0md3v/roop', 'https://github.com/getredash/redash', 'https://github.com/deezer/spleeter', 'https://github.com/Vision-CAIR/MiniGPT-4', 'https://github.com/python-telegram-bot/python-telegram-bot', 'https://github.com/pypa/pipenv', 'https://github.com/myshell-ai/OpenVoice', 'https://github.com/OpenDevin/OpenDevin', 'https://github.com/microsoft/cascadia-code', 'https://github.com/matterport/Mask_RCNN', 'https://github.com/tinygrad/tinygrad', 'https://github.com/svc-develop-team/so-vits-svc', 'https://github.com/RVC-Boss/GPT-SoVITS', 'https://github.com/jumpserver/jumpserver', 'https://github.com/locustio/locust', 'https://github.com/chubin/wttr.in', 'https://github.com/Textualize/textual', 'https://github.com/celery/celery', 'https://github.com/keon/algorithms', 'https://github.com/vnpy/vnpy', 'https://github.com/iperov/DeepFaceLive', 'https://github.com/ultralytics/ultralytics', 'https://github.com/eriklindernoren/ML-From-Scratch', 'https://github.com/microsoft/JARVIS', 'https://github.com/huggingface/diffusers', 'https://github.com/wangzheng0822/algo', 'https://github.com/mouredev/Hello-Python', 'https://github.com/Stability-AI/generative-models', 'https://github.com/sebastianruder/NLP-progress', 'https://github.com/JaidedAI/EasyOCR', 'https://github.com/kovidgoyal/kitty', 'https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix', 'https://github.com/HumanSignal/labelImg', 'https://github.com/d2l-ai/d2l-en', 'https://github.com/AtsushiSakai/PythonRobotics', 'https://github.com/pytorch/examples', 'https://github.com/cookiecutter/cookiecutter', 'https://github.com/tornadoweb/tornado', 'https://github.com/hiyouga/LLaMA-Factory', 'https://github.com/mindsdb/mindsdb', 'https://github.com/deepinsight/insightface', 'https://github.com/openai/gpt-2', 'https://github.com/luong-komorebi/Awesome-Linux-Software', 'https://github.com/WZMIAOMIAO/deep-learning-for-image-processing', 'https://github.com/drduh/macOS-Security-and-Privacy-Guide', 'https://github.com/openai/chatgpt-retrieval-plugin', 'https://github.com/plotly/dash', 'https://github.com/chriskiehl/Gooey', 'https://github.com/jhao104/proxy_pool', 'https://github.com/pyg-team/pytorch_geometric', 'https://github.com/saleor/saleor', 'https://github.com/zulip/zulip', 'https://github.com/jina-ai/jina', 'https://github.com/openai/openai-python', 'https://github.com/KurtBestor/Hitomi-Downloader', 'https://github.com/521xueweihan/GitHub520', 'https://github.com/ArchiveBox/ArchiveBox', 'https://github.com/facebookresearch/audiocraft', 'https://github.com/meta-llama/llama3', 'https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI', 'https://github.com/matplotlib/matplotlib', 'https://github.com/yoheinakajima/babyagi', 'https://github.com/Vonng/ddia', 'https://github.com/PromtEngineer/localGPT', 'https://github.com/vllm-project/vllm', 'https://github.com/ManimCommunity/manim', 'https://github.com/ungoogled-software/ungoogled-chromium', 'https://github.com/karpathy/minGPT', 'https://github.com/magenta/magenta', 'https://github.com/bokeh/bokeh', 'https://github.com/pydantic/pydantic', 'https://github.com/huggingface/datasets', 'https://github.com/microsoft/unilm', 'https://github.com/kholia/OSX-KVM', 'https://github.com/kovidgoyal/calibre', 'https://github.com/mkdocs/mkdocs', 'https://github.com/magic-wormhole/magic-wormhole', 'https://github.com/Delgan/loguru', 'https://github.com/lucidrains/vit-pytorch', 'https://github.com/nginx-proxy/nginx-proxy', 'https://github.com/recommenders-team/recommenders', 'https://github.com/RasaHQ/rasa', 'https://github.com/facebook/prophet', 'https://github.com/sanic-org/sanic', 'https://github.com/kaixindelele/ChatPaper', 'https://github.com/Jack-Cherish/python-spider', 'https://github.com/jantic/DeOldify', 'https://github.com/python/mypy', 'https://github.com/ymcui/Chinese-LLaMA-Alpaca', 'https://github.com/pyscript/pyscript', 'https://github.com/PostHog/posthog', 'https://github.com/mlflow/mlflow', 'https://github.com/spotify/luigi', 'https://github.com/wagtail/wagtail', 'https://github.com/Sanster/IOPaint', 'https://github.com/miloyip/game-programmer', 'https://github.com/joke2k/faker', 'https://github.com/mlc-ai/mlc-llm', 'https://github.com/Ciphey/Ciphey', 'https://github.com/quantopian/zipline', 'https://github.com/paperless-ngx/paperless-ngx', 'https://github.com/frappe/erpnext', 'https://github.com/stitionai/devika', 'https://github.com/rsms/inter', 'https://github.com/kivy/kivy', 'https://github.com/reflex-dev/reflex', 'https://github.com/onnx/onnx', 'https://github.com/reddit-archive/reddit', 'https://github.com/hpcaitech/Open-Sora', 'https://github.com/haotian-liu/LLaVA', 'https://github.com/chatanywhere/GPT_API_free', 'https://github.com/InstaPy/InstaPy', 'https://github.com/binux/pyspider', 'https://github.com/LiLittleCat/awesome-free-chatgpt', 'https://github.com/cool-RR/PySnooper', 'https://github.com/apple/ml-stable-diffusion', 'https://github.com/ipython/ipython', 'https://github.com/wilsonfreitas/awesome-quant', 'https://github.com/alievk/avatarify-python', 'https://github.com/Mikubill/sd-webui-controlnet', 'https://github.com/wting/autojump', 'https://github.com/trekhleb/learn-python', 'https://github.com/eriklindernoren/PyTorch-GAN', 'https://github.com/Kr1s77/awesome-python-login-model', 'https://github.com/twintproject/twint', 'https://github.com/THUDM/ChatGLM2-6B', 'https://github.com/wistbean/learn_python3_spider', 'https://github.com/mnielsen/neural-networks-and-deep-learning', 'https://github.com/pytorch/vision', 'https://github.com/h2y/Shadowrocket-ADBlock-Rules', 'https://github.com/OpenEthan/SMSBoom', 'https://github.com/openai/baselines', 'https://github.com/plotly/plotly.py', 'https://github.com/piskvorky/gensim', 'https://github.com/RunaCapital/awesome-oss-alternatives', 'https://github.com/meta-llama/codellama', 'https://github.com/pallets/click', 'https://github.com/spotDL/spotify-downloader', 'https://github.com/dgtlmoon/changedetection.io', 'https://github.com/Anjok07/ultimatevocalremovergui', 'https://github.com/netbox-community/netbox', 'https://github.com/mxrch/GHunt', 'https://github.com/ranger/ranger', 'https://github.com/tensorflow/tensor2tensor', 'https://github.com/aws/aws-cli', 'https://github.com/blakeblackshear/frigate', 'https://github.com/w-okada/voice-changer', 'https://github.com/GaiZhenbiao/ChuanhuChatGPT', 'https://github.com/PrefectHQ/prefect', 'https://github.com/jupyter/jupyter', 'https://github.com/facefusion/facefusion', 'https://github.com/danielgatis/rembg', 'https://github.com/borisdayma/dalle-mini', 'https://github.com/fabric/fabric', 'https://github.com/aio-libs/aiohttp', 'https://github.com/ddbourgin/numpy-ml', 'https://github.com/TransformerOptimus/SuperAGI', 'https://github.com/microsoft/Bringing-Old-Photos-Back-to-Life', 'https://github.com/pyecharts/pyecharts', 'https://github.com/tiangolo/typer', 'https://github.com/Rapptz/discord.py', 'https://github.com/fauxpilot/fauxpilot', 'https://github.com/lra/mackup', 'https://github.com/apprenticeharper/DeDRM_tools', 'https://github.com/microsoft/qlib', 'https://github.com/networkx/networkx', 'https://github.com/powerline/powerline', 'https://github.com/arc53/DocsGPT', 'https://github.com/Python-World/python-mini-projects', 'https://github.com/airbytehq/airbyte', 'https://github.com/aleju/imgaug', 'https://github.com/roboflow/supervision', 'https://github.com/pjialin/py12306', 'https://github.com/hindupuravinash/the-gan-zoo', 'https://github.com/unifyai/ivy', 'https://github.com/openai/evals', 'https://github.com/horovod/horovod', 'https://github.com/huggingface/peft', 'https://github.com/NVlabs/stylegan', 'https://github.com/tgbot-collection/YYeTsBot', 'https://github.com/gunthercox/ChatterBot', 'https://github.com/UKPLab/sentence-transformers', 'https://github.com/saltstack/salt', 'https://github.com/wangshub/wechat_jump_game', 'https://github.com/youfou/wxpy', 'https://github.com/microsoft/nni', 'https://github.com/deepset-ai/haystack', 'https://github.com/codelucas/newspaper', 'https://github.com/joaomdmoura/crewAI', 'https://github.com/google/yapf', 'https://github.com/psf/requests-html', 'https://github.com/flairNLP/flair', 'https://github.com/sczhou/CodeFormer', 'https://github.com/shengqiangzhang/examples-of-web-crawlers', 'https://github.com/davidsandberg/facenet', 'https://github.com/NanmiCoder/MediaCrawler', 'https://github.com/ansible/awx', 'https://github.com/albumentations-team/albumentations', 'https://github.com/programthink/zhao', 'https://github.com/mail-in-a-box/mailinabox', 'https://github.com/sivel/speedtest-cli', 'https://github.com/searx/searx', 'https://github.com/ShangtongZhang/reinforcement-learning-an-introduction', 'https://github.com/iterative/dvc', 'https://github.com/PySimpleGUI/PySimpleGUI', 'https://github.com/mementum/backtrader', 'https://github.com/tiangolo/sqlmodel', 'https://github.com/nltk/nltk', 'https://github.com/dmlc/dgl', 'https://github.com/microsoft/Swin-Transformer', 'https://github.com/jindongwang/transferlearning', 'https://github.com/facebookresearch/detr', 'https://github.com/idank/explainshell', 'https://github.com/s0md3v/XSStrike', 'https://github.com/fortra/impacket', 'https://github.com/MetaCubeX/mihomo', 'https://github.com/wifiphisher/wifiphisher', 'https://github.com/jaakkopasanen/AutoEq', 'https://github.com/waditu/tushare', 'https://github.com/edgedb/edgedb', 'https://github.com/bloomberg/memray', 'https://github.com/ethereum/EIPs', 'https://github.com/PaddlePaddle/PaddleHub', 'https://github.com/scipy/scipy', 'https://github.com/chroma-core/chroma', 'https://github.com/sympy/sympy', 'https://github.com/beetbox/beets', 'https://github.com/postmanlabs/httpbin', 'https://github.com/labelmeai/labelme', 'https://github.com/SFTtech/openage', 'https://github.com/encode/httpx', 'https://github.com/redis/redis-py', 'https://github.com/getpelican/pelican', 'https://github.com/THUDM/ChatGLM3', 'https://github.com/jina-ai/clip-as-service', 'https://github.com/donnemartin/awesome-aws', 'https://github.com/microsoft/pyright', 'https://github.com/pre-commit/pre-commit', 'https://github.com/PaddlePaddle/PaddleDetection', 'https://github.com/ocrmypdf/OCRmyPDF', 'https://github.com/lss233/chatgpt-mirai-qq-bot', 'https://github.com/ydataai/ydata-profiling', 'https://github.com/dask/dask', 'https://github.com/mwaskom/seaborn', 'https://github.com/ranaroussi/yfinance', 'https://github.com/tonybeltramelli/pix2code', 'https://github.com/threat9/routersploit', 'https://github.com/Miserlou/Zappa', 'https://github.com/alexjc/neural-enhance', 'https://github.com/Zulko/moviepy', 'https://github.com/meolu/walle-web', 'https://github.com/OpenMOSS/MOSS', 'https://github.com/smicallef/spiderfoot', 'https://github.com/matrix-org/synapse', 'https://github.com/google-deepmind/alphafold', 'https://github.com/dbcli/pgcli', 'https://github.com/python-pillow/Pillow', 'https://github.com/BlinkDL/RWKV-LM', 'https://github.com/allenai/allennlp', 'https://github.com/LlamaFamily/Llama-Chinese', 'https://github.com/smol-ai/developer', 'https://github.com/janeczku/calibre-web', 'https://github.com/Embedding/Chinese-Word-Vectors', 'https://github.com/cookiecutter/cookiecutter-django', 'https://github.com/rougier/numpy-100', 'https://github.com/zalandoresearch/fashion-mnist']

In [None]:
# Directory to store CSV files
output_dir = "github_repo_source_code"
os.makedirs(output_dir, exist_ok=True)
for url in github_urls:
    try:
        repo_name, concatenated_code = collect_source_code(url)
        txt_file_name = f"{repo_name}.txt"
        txt_file_name = os.path.join(output_dir, txt_file_name)
        with open(txt_file_name, 'w', encoding='utf-8') as txt_file:
            txt_file.write(concatenated_code)
        print(f"Successfully processed and saved {url} to {txt_file_name}")
    except Exception as e:
        print(f"Error processing {url}: {e}")

print("All repositories processed successfully.")

# HNSWLIB Context Generation

In [None]:
!pip install hnswlib sentence_transformers langchain_text_splitters

In [None]:
import hnswlib
import numpy as np

def get_context(sentences, embeds, question_embed):
    dim = embeds.shape[1]
    num_elements = embeds.shape[0]

    # Generating sample data
    data = embeds
    ids = np.arange(num_elements)

    # Declaring index
    p = hnswlib.Index(space = 'cosine', dim = dim) # possible options are l2, cosine or ip

    # Initializing index - the maximum number of elements should be known beforehand
    p.init_index(max_elements = num_elements, ef_construction = 200, M = 16)

    # Element insertion (can be called several times):
    p.add_items(data, ids)

    # Controlling the recall by setting ef:
    p.set_ef(50) # ef should always be > k

    # Query dataset, k - number of the closest elements (returns 2 numpy arrays)
    labels, distances = p.knn_query(question_embed, k = 4)

    return "".join([sentences[index] for index in labels[0]])

In [None]:
from sentence_transformers import SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
import pandas as pd
import os
import pickle

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=100
)

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

root_dir = "./"
context_root_dir = "./github_repo_source_code/"
readme_root_dir = "./output_csv_files/"

with open('./repo_urls.pickle', 'rb') as f:
    repo_name_list = pickle.load(f)

new_rows = []
for repo in repo_name_list:
    repo_name = repo.split("/")[-1]
    file1 = repo_name +".txt"
    with open(os.path.join(context_root_dir, file1)) as f:
        data = f.read()
    sentences = text_splitter.split_text(data)
    embeddings = model.encode(sentences)
    print(embeddings.shape)

    file2 = repo_name +".csv"
    df2 = pd.read_csv(os.path.join(readme_root_dir, file2))
    for i, row in df2.iterrows():
        title = row["Title"]
        content = row["Content"]
        if "?" in title:
            question = f"In context to the project {repo_name}, answer the following. " + title
            question_embedding = model.encode([question])
            context = get_context(sentences, embeddings, question_embedding)
            new_row  = {"Question": question, "Context": context, "Answer": content, "Repo Url": repo, "Repo": repo_name}
            new_rows.append(new_row)
        else:
            question = f"Provide the README content for the section with heading \"{title}\" starting with ## {title}."
            question_embedding = model.encode([question])
            context = get_context(sentences, embeddings, question_embedding)
            new_row  = {"Question": question, "Context": context, "Answer": content, "Repo Url": repo, "Repo": repo_name}
            new_rows.append(new_row)
    print(len(new_rows))
    df3 = pd.DataFrame(new_rows, index=None)
    df3.to_csv(os.path.join(root_dir, "readme_qa.csv"), mode="a")

# Clean Data

In [13]:
def remove_urls(text):
  """Remove URLs from a given text string."""
  url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
  return re.sub(url_pattern, '', text)

def remove_html_tags(text):
  """Remove HTML tags from a given text string."""
  html_pattern = r'<.*?>'
  return re.sub(html_pattern, '', text)

In [24]:
def clean_text(text):
    # Define the regular expression pattern for HTTP URLs
    http_pattern = re.compile(r'http://[^\s]+')
    # Remove HTTP URLs
    text = http_pattern.sub('', str(text))

    https_pattern = re.compile(r'https://[^\s]+')
    # Remove HTTPS URLs
    text = https_pattern.sub('', str(text))
    
    # Define the regular expression pattern for <img> tags
    img_pattern = re.compile(r'<img[^>]*>')
    # Remove <img> tags
    text = img_pattern.sub('', str(text))
    
    return text

In [39]:
import re
def clean_emoji(tx):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols 
                           u"\U0001F680-\U0001F6FF"  # transport 
                           u"\U0001F1E0-\U0001F1FF"  # flags 
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)

    return emoji_pattern.sub(r'', tx)

def text_cleaner(tx):

    text = re.sub(r"won\'t", "would not", tx)
    text = re.sub(r"im", "i am", tx)
    text = re.sub(r"Im", "I am", tx)
    text = re.sub(r"can\'t", "can not", text)
    text = re.sub(r"don\'t", "do not", text)
    text = re.sub(r"shouldn\'t", "should not", text)
    text = re.sub(r"needn\'t", "need not", text)
    text = re.sub(r"hasn\'t", "has not", text)
    text = re.sub(r"haven\'t", "have not", text)
    text = re.sub(r"weren\'t", "were not", text)
    text = re.sub(r"mightn\'t", "might not", text)
    text = re.sub(r"didn\'t", "did not", text)
    text = re.sub(r"n\'t", " not", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'s", " is", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'t", " not", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'m", " am", text)
    # text = re.sub('https?://\S+|www\.\S+', ' ', text)
    text = re.sub(r'https?://[^\s\")]+', '', text)
    text = re.sub(r'http?://[^\s\")]+', '', text)
    text = re.sub(r'http%3A%2F%2F[^\s\")]+', '', text)
    text = re.sub(r'https%3A%2F%2F[^\s\")]+', '', text)
    text = re.sub(r'[^a-zA-Z0-9\!\?\.\@]',' ' , text)
    text = re.sub(r'[!]+' , '!' , text)
    text = re.sub(r'[?]+' , '?' , text)
    text = re.sub(r'[.]+' , '.' , text)
    text = re.sub(r'[@]+' , '@' , text)
    text = re.sub(r'unk' , '<UNK>' , text)
    # text = re.sub('\n', '<NL>', text)
    # text = re.sub('\t', '<TAB>', text)
    # text = re.sub(r'\s+', '<SP>', text)
    # text = re.sub(r'(<img[^>]*\bsrc=")[^"]*(")', '<img src=<IMG_SRC>', text)
    
    text = text.lower()
    text = re.sub(r'[ ]+' , ' ' , text)

    return text

In [40]:
import pandas as pd
df = pd.read_csv("scripts/readme_qa.csv")
df.columns = [str(q).strip() for q in df.columns]

In [41]:
df["Answer"].values[0:5]

array(['Explore popular APIs and see them work in Postman. <br > <p> <a href="https://apilayer.com"> <div> <img src=".github/cs1586-APILayerLogoUpdate2022-LJ_v2-HighRes.png" width="250" alt="APILayer Logo" /> </div> </a> </p> [APILayer](https://apilayer.com/) is the fastest way to integrate APIs into any product. They created this repository to support the community in easily finding public APIs. Explore their collections on the [Postman API Network](https://www.postman.com/apilayer/workspace/apilayer/overview).',
       '| API | Description | Call this API | |:---|:---|:---| | [IP Stack](https://ipstack.com/) | Locate and Identify Website Visitors by IP Address | [<img src="https://run.pstmn.io/button.svg" alt="Run In Postman" style="width: 128px; height: 32px;">](https://god.gw.postman.com/run-collection/10131015-55145132-244c-448c-8e6f-8780866e4862?action=collection%2Ffork&source=rip_markdown&collection-url=entityId%3D10131015-55145132-244c-448c-8e6f-8780866e4862%26entityType%3Dcoll

In [42]:
import numpy as np
df.replace('', np.nan, inplace=True)
df.dropna(subset=["Answer"], inplace=True)
df = df[["Question", "Context", "Answer", "Repo Url", "Repo"]]
df.head()

Unnamed: 0,Question,Context,Answer,Repo Url,Repo
0,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,Explore popular APIs and see them work in Post...,https://github.com/public-apis/public-apis,public-apis
1,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,| API | Description | Call this API | |:---|:-...,https://github.com/public-apis/public-apis,public-apis
2,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,| API | Description | Auth | Call this API | |...,https://github.com/public-apis/public-apis,public-apis
3,Provide the README content for the section wit...,# check each category for the minimum number o...,* [Animals](#animals) * [Anime](#anime) * [Art...,https://github.com/public-apis/public-apis,public-apis
4,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,<br > <strong>Get Involved</strong> * [Contrib...,https://github.com/public-apis/public-apis,public-apis


In [43]:
from langdetect import detect
df['detect'] = detect(str(df['Answer']))
df.head()

Unnamed: 0,Question,Context,Answer,Repo Url,Repo,detect
0,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,Explore popular APIs and see them work in Post...,https://github.com/public-apis/public-apis,public-apis,en
1,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,| API | Description | Call this API | |:---|:-...,https://github.com/public-apis/public-apis,public-apis,en
2,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,| API | Description | Auth | Call this API | |...,https://github.com/public-apis/public-apis,public-apis,en
3,Provide the README content for the section wit...,# check each category for the minimum number o...,* [Animals](#animals) * [Anime](#anime) * [Art...,https://github.com/public-apis/public-apis,public-apis,en
4,Provide the README content for the section wit...,Discussions in issues and pull requests:\n ...,<br > <strong>Get Involved</strong> * [Contrib...,https://github.com/public-apis/public-apis,public-apis,en


In [44]:
df = df[df['detect'] == 'en']
df = df[["Question", "Context", "Answer", "Repo Url", "Repo"]]
len(df)

12803

In [45]:
# df["Answer"] = df["Answer"].apply(clean_text)
df["Answer"] = df["Answer"].apply(text_cleaner)
df["Answer"] = df["Answer"].apply(clean_emoji)
df["Context"] = df["Context"].apply(text_cleaner)
df["Answer"].values[0:5]

array(['explore popular apis and see them work in postman. br p a href div img src .github cs1586 apilayerlogoupdate2022 lj v2 highres.png width 250 alt apilayer logo div a p apilayer is the fastest way to integrate apis into any product. they created this repository to support the community in easily finding public apis. explore their collections on the postman api network .',
       ' api description call this api ip stack locate and identify website visitors by ip address img src alt run in postman style width 128px height 32px marketstack free easy to use rest api interface delivering worldwide stock market data in json format img src alt run in postman style width 128px height 32px weatherstack retrieve instant accurate weather information for any location in the world in lightweight json format img src alt run in postman style width 128px height 32px numverify global phone number validation lookup json api img src alt run in postman style width 128px height 32px fixer fixer is a 

In [46]:
options = ['allennlp', 'autojump', 'typer', 'spotify-downloader', 'spleeter', 'python-fire', 'numpy-ml', 'magenta'] 
   
# selecting rows based on condition 
df = df[df['Repo'].isin(options)]
len(df)

339

In [47]:
df["Answer"].values[-50:]

array(['poetry run pytest tests ',
       ' deezer research source separation engine story deezer.io blog post english version japanese version music source separation tool with pre trained models ismir2019 extended abstract if you use spleeter in your work please cite bibtex @article spleeter2020 doi 10.21105 joss.02154 url year 2020 publisher the open journal volume 5 number 50 pages 2154 author romain hennequin and anis khlif and felix voituret and manuel moussallam title spleeter a fast and efficient music source separation tool with pre trained models journal journal of open source software note deezer research ',
       'the code of spleeter is mit licensed license .',
       'if you plan to use spleeter on copyrighted material make sure you get proper authorization from right owners beforehand.',
       ' spleeter is a complex piece of software and although we continously try to improve and test it you may encounter unexpected issues running it. if that is the case please check 

In [12]:
df.to_csv("scripts/readme_qa_cleaned_small_v5.csv", index =False)

# Scoring

In [7]:
from pprint import pprint
# from torchmetrics.text.bert import BERTScore
import bert_score
import re

# with open("/home/scp6004/doc_generator/output/spleeter/docs/data/README_LLAMA2_7B_CHAT_GPTQ.md", 'r', encoding='utf-8') as f:
#     pred = f.read()
# with open("/home/scp6004/doc_generator/spleeter/README.md", 'r', encoding='utf-8') as f:
#     target = f.read()

pred = """
Research repository for TouchPose
This folder contains pre-trained models for various tasks, such as hand detection, finger segmentation, and gesture recognition.

Disclaimer:
The Licensor will not be liable to you on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
 
License
This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License."""

target = """
Research repository for TouchPose
This repository contains the part of the dataset we collected on pairs of capacitive touch images and depth maps of fingers and hands.

Disclaimer:
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW THE AUTHOR WILL BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH
ANY OTHER PROGRAMS), EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

License
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
"""

pred = re.sub(r' +', ' ', pred)
target = re.sub(r' +', ' ', target)
P, R, F1 = bert_score.score([pred], [target], lang='en', model_type='roberta-large', verbose=True)
print(P,R,F1)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00, 28.65it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 398.02it/s]

done in 0.04 seconds, 23.79 sentences/sec
tensor([0.8705]) tensor([0.8412]) tensor([0.8556])





In [5]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformers import AutoTokenizer

# Download the required NLTK data
# nltk.download('punkt')

pred = """
## About
Spleeter is an open-source tool for music information retrieval and generation. It is designed to be highly extensible and customizable, allowing users to create their own plugins and integrations. Spleeter is built using Python and uses a variety of libraries and frameworks, including TensorFlow, NumPy, and Librosa.
The goal of Spleeter is to provide a flexible and powerful platform for music information retrieval and generation, while also being easy to use and understand. It is designed to be used by both developers and non-developers, and can be used for a wide range of applications, such as creating music databases, generating music metadata, and more.
Spleeter is actively maintained and developed by the community, with new features and improvements being added regularly. It is released under the MIT License, which means that it is free to use and modify for any purpose.
If you are interested in learning more about Spleeter or getting involved in the development process, please visit the <a href="https://github.com/deezer/spleeter/wiki">Spleeter documentation</a>.

### Projects and Softwares using **Spleeter**
Spleeter is a powerful tool for audio signal processing and analysis, and many projects and softwares have already benefited from its capabilities. Here are some examples of projects and softwares that use or have used Spleeter:

## Spleeter Pro (Commercial version)
Spleeter Pro is a commercial version of Spleeter, a Python library for audio signal processing. It offers additional features and improvements over the free version, including:
Improved performance: Spleeter Pro has been optimized for faster processing times, making it ideal for large-scale audio processing tasks.
Enhanced functionality: Spleeter Pro includes additional features such as noise reduction, equalization, and compression, providing more options for audio processing.
Better support: Spleeter Pro comes with dedicated customer support, ensuring that you get the help you need when you need it.

## Troubleshooting
In case none of the above solutions work, please open an issue on the <code>spleeter</code> GitHub repository with as much detail as possible, including any error messages you encountered and the steps you took leading up to the issue.
"""

target = """
## About
Spleeter is [Deezer](https://www.deezer.com/) source separation library with pretrained models
written in [Python](https://www.python.org/) and uses [Tensorflow](https://tensorflow.org/). Spleeter is also very fast as it can perform separation of audio files to 4 stems 100x faster than real-time when run on a GPU.

We designed Spleeter so you can use it straight from [command line](https://github.com/deezer/spleeter/wiki/2.-Getting-started#usage)
as well as directly in your own development pipeline as a [Python library](https://github.com/deezer/spleeter/wiki/4.-API-Reference#separator). It can be installed with [pip](https://github.com/deezer/spleeter/wiki/1.-Installation#using-pip) or be used with
[Docker](https://github.com/deezer/spleeter/wiki/2.-Getting-started#using-docker-image).

### Projects and Softwares using **Spleeter**
**Spleeter** pre-trained models have also been used by professionnal audio softwares. Here's a non-exhaustive list:

## Spleeter Pro (Commercial version)
Check out our commercial version : [Spleeter Pro](https://www.deezer-techservices.com/solutions/spleeter/). Benefit from our expertise for precise audio separation, faster processing speeds, and dedicated professional support. 

## Troubleshooting
Spleeter is a complex piece of software and although we continously try to improve and test it you may encounter unexpected issues running it. If that's the case please check the [FAQ page](https://github.com/deezer/spleeter/wiki/5.-FAQ) first as well as the list of [currently open issues](https://github.com/deezer/spleeter/issues)"""
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7b-Chat-GPTQ")
tokenizer.pad_token = tokenizer.eos_token

# import tiktoken
# enc = tiktoken.get_encoding("cl100k_base")

def calculate_bleu(reference, candidate):
    reference_tokens = tokenizer.tokenize(reference)
    candidate_tokens = tokenizer.tokenize(candidate)
    smoothie = SmoothingFunction().method4
    return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)

bleu_score = calculate_bleu(target, pred)
print(bleu_score)



0.18306802223623997


In [16]:
from torchmetrics.text import BLEUScore
bleu = BLEUScore(n_gram=4, smooth=True)
reference_tokens = tokenizer.tokenize(target)
candidate_tokens = tokenizer.tokenize(pred)
print(bleu(reference_tokens, candidate_tokens))

tensor(0.)


In [None]:
# import torch
# from torcheval.metrics.text import Perplexity
# metric=Perplexity()
# metric.update(input, target)
# metric.compute()