In [None]:
# ===============================
# C√âLULA 0 ‚Äî CHECK DE GPU
# ===============================

import torch, sys

print("Torch:", torch.__version__)
print("CUDA dispon√≠vel:", torch.cuda.is_available())

if not torch.cuda.is_available():
    raise RuntimeError(
        "‚ùå GPU N√ÉO ATIVA\n"
        "Ative em: Ambiente de execu√ß√£o ‚Üí Alterar tipo ‚Üí GPU\n"
        "Depois REINICIE o runtime."
    )

print("GPU detectada:", torch.cuda.get_device_name(0))
print("‚úÖ GPU OK ‚Äî pode continuar")


In [None]:
# ===============================
# C√âLULA 1 ‚Äî HUGGING FACE LOGIN
# ===============================

import os
from huggingface_hub import login

if "HUGGINGFACE_TOKEN" in os.environ:
    login(token=os.environ["HUGGINGFACE_TOKEN"])
    print("‚úÖ Hugging Face autenticado com token")
else:
    print("‚ö†Ô∏è HUGGINGFACE_TOKEN n√£o encontrado")
    print("‚û°Ô∏è Acesso p√∫blico ser√° usado (ok para Qwen 7B)")


In [None]:
# ===============================
# C√âLULA 2 ‚Äî SETUP
# ===============================

import os, sys, subprocess

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

def pip_install(args):
    subprocess.check_call([sys.executable, "-m", "pip"] + args.split())

def pip_uninstall(pkg):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", pkg])
    except:
        pass

# N√ÉO reinstala torch (Colab j√° vem com CUDA correto)
pip_install("install -q -U transformers accelerate huggingface_hub")

pip_uninstall("bitsandbytes")
pip_install("install -q -U bitsandbytes")

print("‚úÖ Ambiente configurado")


In [None]:
# ===============================
# C√âLULA 3 ‚Äî MODELO QWEN 2.5 7B
# ===============================

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"

# Tokenizer (leve, n√£o ocupa VRAM)
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    use_fast=True,
    trust_remote_code=True
)

# Quantiza√ß√£o 4-bit otimizada para T4
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# üö´ NUNCA usar device_map="auto" no Colab Free
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map={"": 0},          # for√ßa TUDO na GPU
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,      # evita estouro de RAM
    trust_remote_code=True,
)

model.eval()

print("‚úÖ Qwen2.5-7B carregado com sucesso")
print("VRAM usada (GB):", round(torch.cuda.memory_allocated() / 1024**3, 2))


In [None]:
# ===============================
# C√âLULA 4 ‚Äî INFER√äNCIA SEGURA
# ===============================

def ask_qwen(
    prompt,
    max_new_tokens=700,
    temperature=0.2,
    top_p=0.95,
    max_prompt_tokens=6000,  # CONTROLE REAL
    show_usage=False
):
    messages = [
        {"role": "system", "content": "Voc√™ √© um especialista em Engenharia de Software e DevOps."},
        {"role": "user", "content": prompt},
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        truncation=True,
        max_length=max_prompt_tokens,
        return_tensors="pt"
    ).to(model.device)

    if show_usage:
        print("Prompt tokens:", input_ids.shape[-1])

    with torch.inference_mode():
        output = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded.split(messages[-1]["content"])[-1].strip()


In [None]:
# ===============================
# C√âLULA 5 ‚Äî TESTE
# ===============================

resp = ask_qwen(
    "Explique em 3 linhas o que √© GitFlow.",
    show_usage=True
)

print(resp)


In [None]:
# ===============================
# C√âLULA 6 ‚Äî COLETA GIT
# ===============================

import subprocess, os
from collections import Counter
from IPython.display import Markdown, display

def git(cmd, cwd):
    return subprocess.check_output(cmd, cwd=cwd, shell=True).decode().strip()

repo_url = "https://github.com/unclecode/crawl4ai.git"
repo_dir = "/content/repo"

if not os.path.exists(repo_dir):
    subprocess.check_call(["git", "clone", repo_url, repo_dir])

branches = git("git branch -r | grep -v HEAD", repo_dir).splitlines()
branches = [b.replace("origin/", "").strip() for b in branches]

tags = git("git tag --list", repo_dir).splitlines()

bucket = Counter(b.split('/')[0] if '/' in b else 'root' for b in branches)

display(Markdown(f"- **Branches:** {len(branches)}"))
display(Markdown(f"- **Tags:** {len(tags)}"))
display(Markdown(f"```{bucket}```"))


In [None]:
# ===============================
# C√âLULA 7 ‚Äî AN√ÅLISE
# ===============================

prompt = f"""
Analise o modelo de branching com base nos dados reais.

Branches totais: {len(branches)}
Distribui√ß√£o:
{bucket}

Amostra:
{sorted(branches)[:40]}

Classifique:
A) GitFlow
B) GitHub Flow
C) Trunk-Based Development

Justifique com evid√™ncias reais.
"""

resp = ask_qwen(prompt)
display(Markdown("## üåø Branching Model"))
display(Markdown(resp))


In [None]:
# ===============================
# C√âLULA 8 ‚Äî RELEASES
# ===============================

prompt = f"""
Analise a estrat√©gia de releases com base nas tags abaixo.

Total de tags: {len(tags)}
Amostra de tags:
{tags[-20:]}

Classifique:
- Rapid Release
- Release Train
- LTS + Current
- Ad-hoc
"""

print(ask_qwen(prompt))
