In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m95.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m

In [None]:
import requests
import os
from nbconvert import PythonExporter
from transformers import GPT2LMHeadModel, GPT2Tokenizer


def fetch_user_repositories(github_url):
    username = github_url.split("/")[-1]
    response = requests.get(f"https://api.github.com/users/{username}/repos")
    if response.status_code == 200:
        repositories = response.json()
        return repositories
    else:
        print("Failed to fetch user repositories.")
        return []

def preprocess_code(repository):
    repo_name = repository["name"]
    clone_url = repository["clone_url"]
    repo_path = f"./{repo_name}"
    os.system(f"git clone {clone_url} {repo_path}")

    for root, dirs, files in os.walk(repo_path):
        for file in files:
            file_path = os.path.join(root, file)
            if file.endswith(".ipynb"):
                preprocess_jupyter_notebook(file_path)
            else:
                preprocess_other_files(file_path)

def preprocess_jupyter_notebook(file_path):
    exporter = PythonExporter()
    (body, _) = exporter.from_filename(file_path)
    MAX_SNIPPET_SIZE = 10000
    code_snippets = []
    if len(body) > MAX_SNIPPET_SIZE:
        code_lines = body.splitlines()
        current_snippet = ""

        for line in code_lines:
            current_snippet += line + "\n"
            if len(current_snippet) >= MAX_SNIPPET_SIZE:
                code_snippets.append(current_snippet)
                current_snippet = ""

        if current_snippet != "":
            code_snippets.append(current_snippet)
    else:
        code_snippets.append(body)

    for snippet in code_snippets:
        print("Processing code snippet:", snippet)

def preprocess_other_files(file_path):
    MAX_FILE_SIZE = 1000000
    file_size = os.path.getsize(file_path)

    if file_size > MAX_FILE_SIZE:
        CHUNK_SIZE = 100000

        with open(file_path, "rb") as file:
            while True:
                data = file.read(CHUNK_SIZE)
                if not data:
                    break

                print(data.decode("utf-8", errors="ignore"))
    else:
        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
            data = file.read()
            print(data)

def generate_prompt(code_snippet):
    prompt = f"Analyze the technical complexity of the following code snippet:\n\n{code_snippet}\n\n"
    return prompt

def evaluate_code_complexity(code_snippet):
    prompt = generate_prompt(code_snippet)
    model_name = "gpt2"
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(inputs, max_length=100, num_return_sequences=1, early_stopping=True)
    complexity_score = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return complexity_score

# Put the Github link here
github_url = "Write Down the url"
repositories = fetch_user_repositories(github_url)

complexity_scores = []
for repository in repositories:
    preprocess_code(repository)
    complexity_score = evaluate_code_complexity(repository["name"])
    complexity_scores.append((repository["name"], complexity_score))

complexity_scores.sort(key=lambda x: x[1], reverse=True)

most_complex_repository = complexity_scores[0][0]
most_complex_repository_url = f"https://github.com/{github_url.split('/')[-1]}/{most_complex_repository}"

prompt = f"Based on the analysis, the repository '{most_complex_repository}' was selected as the most technically complex due to..."

justification = evaluate_code_complexity(prompt)

print("Most Technically Complex Repository:", most_complex_repository)
print("Repository URL:", most_complex_repository_url)
print("Justification:", justification)