In [1]:
!pip install -U transformers accelerate sentencepiece




In [2]:
from google.colab import files
uploaded = files.upload()


Saving core-0.1.1.py to core-0.1.1.py
Saving core-0.2.2.py to core-0.2.2.py
Saving core-0.2.4.py to core-0.2.4.py


In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1"
MODEL_NAME = "Mistral-7B"

SYSTEM_PROMPT = """
Atue como um Engenheiro de Software SÃªnior especialista em Python e Engenharia de Software.
Analise o cÃ³digo procurando por Code Smells conforme o Refactoring Guru,
com foco em: Long Method, Magic Numbers e Cognitive Complexity.

Para cada code smell identificado, forneÃ§a:
- LocalizaÃ§Ã£o
- EvidÃªncia
- Justificativa tÃ©cnica
- SugestÃ£o de refatoraÃ§Ã£o
"""

device = "cuda"
dtype = torch.float16


def carregar_modelo():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="auto",
        torch_dtype=dtype,
        low_cpu_mem_usage=True
    ).eval()

    return tokenizer, model


def analisar_codigo(codigo, tokenizer, model):
    MAX_CHARS = 2500

    codigo = codigo[:MAX_CHARS]

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Analise o cÃ³digo:\n\n{codigo}"}
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(device)

    attention_mask = input_ids.ne(tokenizer.pad_token_id)

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=300,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )

    resposta = tokenizer.decode(
        output[0][input_ids.shape[1]:],
        skip_special_tokens=True
    )

    return resposta


In [4]:
tokenizer, model = carregar_modelo()

resultados = {}

for nome_arquivo in uploaded.keys():
    print(f"\nðŸ“‚ Analisando: {nome_arquivo}")

    with open(nome_arquivo, "r", encoding="utf-8") as f:
        codigo = f.read()

    resultado = analisar_codigo(codigo, tokenizer, model)
    resultados[nome_arquivo] = resultado

    print("\n--- RESULTADO ---\n")
    print(resultado)


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]




ðŸ“‚ Analisando: core-0.1.1.py

--- RESULTADO ---

Code Smell: Long Method

Location: `download_models` method in the `Chat` class

Evidence: The `download_models` method has more than 50 lines of code.

Justificativa tÃ©cnica: Long methods can make the code harder to read, understand, and maintain. They can also make it more difficult to identify and fix bugs.

SugestÃ£o de refatoraÃ§Ã£o: Split the `download_models` method into smaller, more focused methods. For example, you could create separate methods for downloading models from Hugging Face, local files, and custom paths. This would make the code more modular and easier to understand.

Code Smell: Magic Numbers

Location: The `download_all_assets` function in the `utils` module

Evidence: The `download_all_assets` function uses the `os.getcwd()` method to get the current working directory, but this method returns the path to the current working directory relative to the Python interpreter. This means that the path returned by `os

In [7]:
from google.colab import files

for nome, texto in resultados.items():
    saida = f"resultado_{nome.replace('.py','')}.txt"

    with open(saida, "w", encoding="utf-8") as f:
        f.write(texto)

    files.download(saida)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>