In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer, get_cosine_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch.cuda.amp import autocast, GradScaler
import pandas as pd
import numpy as np
import os
import joblib
import time

# ==============================================================================
# ‚öôÔ∏è L4 GPU ƒ∞√áƒ∞N OPTƒ∞Mƒ∞ZE EDƒ∞LMƒ∞≈û AYARLAR (TURBO MODE)
# ==============================================================================
FILE_NAME = "/content/Phishing_Dataset.csv"
BASE_MODEL = "xlm-roberta-base"
SAVE_DIR_BINARY = "/content/drive/MyDrive/Phishing_Project_FINAL/Brain_1_Detector"
SAVE_DIR_MULTI = "/content/drive/MyDrive/Phishing_Project_FINAL/Brain_2_Expert"

# üî• L4 G√ú√á AYARLARI üî•
EPOCHS = 3
BATCH_SIZE = 64          # L4'√ºn 24GB VRAM'i bunu rahat√ßa kaldƒ±rƒ±r (Hƒ±z artƒ±≈üƒ±)
MAX_LEN = 128
LR = 2e-5
NUM_WORKERS = 2          # Veri y√ºklemeyi hƒ±zlandƒ±rƒ±r

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# L4/A100 i√ßin Tens√∂r √áekirdeƒüi Optimizasyonu (TF32)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

print(f"üî• DONANIM: {torch.cuda.get_device_name(0)}")
print(f"üöÄ MOD: L4 Performance Mode (Batch: {BATCH_SIZE}, TF32: On)")
print("="*60)

# Veriyi Y√ºkle
if not os.path.exists(FILE_NAME):
    raise FileNotFoundError(f"‚ùå Dosya bulunamadƒ±: {FILE_NAME}")

df = pd.read_csv(FILE_NAME, low_memory=False)
df['Full_Text'] = df['Full_Text'].fillna("").astype(str)

# Dataset Sƒ±nƒ±fƒ±
class PhishingDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
    def __len__(self): return len(self.texts)
    def __getitem__(self, i):
        enc = self.tokenizer(
            str(self.texts[i]), add_special_tokens=True, max_length=MAX_LEN,
            padding='max_length', truncation=True, return_tensors='pt'
        )
        return {
            'input_ids': enc['input_ids'].flatten(),
            'attention_mask': enc['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[i], dtype=torch.long)
        }

# ==============================================================================
# üß† BEYƒ∞N 1: DEDEKT√ñR (SAFE vs PHISHING)
# ==============================================================================
print("\nüõ°Ô∏è A≈ûAMA 1: DEDEKT√ñR MODELƒ∞ Eƒûƒ∞Tƒ∞Lƒ∞YOR...")
if not os.path.exists(SAVE_DIR_BINARY): os.makedirs(SAVE_DIR_BINARY)

# Veri
X_bin = df['Full_Text'].values
y_bin = df['Etiket'].astype(int).values

# Aƒüƒ±rlƒ±klar
weights_bin = compute_class_weight('balanced', classes=np.unique(y_bin), y=y_bin)
print(f"   ‚öñÔ∏è Dedekt√∂r Aƒüƒ±rlƒ±klarƒ± -> G√ºvenli: {weights_bin[0]:.2f} | Phishing: {weights_bin[1]:.2f}")

# B√∂lme
X_train_b, X_val_b, y_train_b, y_val_b = train_test_split(X_bin, y_bin, test_size=0.15, stratify=y_bin, random_state=42)

# Tokenizer
tokenizer = XLMRobertaTokenizer.from_pretrained(BASE_MODEL)
tokenizer.save_pretrained(SAVE_DIR_BINARY)

# Loader (Pin Memory ile Hƒ±zlƒ± Transfer)
train_ds_b = PhishingDataset(X_train_b, y_train_b, tokenizer)
val_ds_b = PhishingDataset(X_val_b, y_val_b, tokenizer)
train_loader_b = DataLoader(train_ds_b, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader_b = DataLoader(val_ds_b, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True)

# Model
model_b = XLMRobertaForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=2).to(device)
optimizer_b = AdamW(model_b.parameters(), lr=LR)
criterion_b = nn.CrossEntropyLoss(weight=torch.tensor(weights_bin, dtype=torch.float).to(device))
scaler = GradScaler() # FP16 Hƒ±zlandƒ±rƒ±cƒ±

# Eƒüitim 1
for epoch in range(EPOCHS):
    model_b.train()
    t0 = time.time()
    total_loss = 0

    for step, batch in enumerate(train_loader_b):
        optimizer_b.zero_grad()
        with autocast():
            outputs = model_b(input_ids=batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            loss = criterion_b(outputs.logits, batch['labels'].to(device))

        scaler.scale(loss).backward()
        scaler.step(optimizer_b)
        scaler.update()
        total_loss += loss.item()

    # Validation 1
    model_b.eval()
    preds, true_vals = [], []
    with torch.no_grad():
        for batch in val_loader_b:
            out = model_b(input_ids=batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            preds.extend(torch.argmax(out.logits, dim=1).cpu().numpy())
            true_vals.extend(batch['labels'].cpu().numpy())

    f1 = f1_score(true_vals, preds)
    elapsed = int(time.time() - t0)
    print(f"   Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader_b):.4f} | F1: {f1:.4f} | S√ºre: {elapsed}sn")

model_b.save_pretrained(SAVE_DIR_BINARY)
print(f"   ‚úÖ Dedekt√∂r Kaydedildi.")
del model_b, optimizer_b, train_loader_b # RAM Temizliƒüi
torch.cuda.empty_cache()

# ==============================================================================
# üß† BEYƒ∞N 2: UZMAN (KATEGORƒ∞)
# ==============================================================================
print("\nüé© A≈ûAMA 2: UZMAN MODELƒ∞ Eƒûƒ∞Tƒ∞Lƒ∞YOR (Multi-Class)...")
if not os.path.exists(SAVE_DIR_MULTI): os.makedirs(SAVE_DIR_MULTI)

# Sadece Phishing Verisi
df_phish = df[df['Etiket'] == 1].copy()
X_multi = df_phish['Full_Text'].values
y_cats = df_phish['Kategori'].values

# Label Encoding
label_encoder = LabelEncoder()
y_multi = label_encoder.fit_transform(y_cats)
joblib.dump(label_encoder, os.path.join(SAVE_DIR_MULTI, "category_encoder.pkl"))

# Aƒüƒ±rlƒ±klar
weights_multi = compute_class_weight('balanced', classes=np.unique(y_multi), y=y_multi)

# B√∂lme
X_train_m, X_val_m, y_train_m, y_val_m = train_test_split(X_multi, y_multi, test_size=0.15, stratify=y_multi, random_state=42)

# Loader
train_ds_m = PhishingDataset(X_train_m, y_train_m, tokenizer)
val_ds_m = PhishingDataset(X_val_m, y_val_m, tokenizer)
train_loader_m = DataLoader(train_ds_m, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader_m = DataLoader(val_ds_m, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True)

# Model 2
model_m = XLMRobertaForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=len(label_encoder.classes_)).to(device)
optimizer_m = AdamW(model_m.parameters(), lr=LR)
criterion_m = nn.CrossEntropyLoss(weight=torch.tensor(weights_multi, dtype=torch.float).to(device))

# Eƒüitim 2
for epoch in range(EPOCHS):
    model_m.train()
    t0 = time.time()
    total_loss = 0

    for batch in train_loader_m:
        optimizer_m.zero_grad()
        with autocast():
            outputs = model_m(input_ids=batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            loss = criterion_m(outputs.logits, batch['labels'].to(device))
        scaler.scale(loss).backward()
        scaler.step(optimizer_m)
        scaler.update()
        total_loss += loss.item()

    # Validation 2
    model_m.eval()
    preds, true_vals = [], []
    with torch.no_grad():
        for batch in val_loader_m:
            out = model_m(input_ids=batch['input_ids'].to(device), attention_mask=batch['attention_mask'].to(device))
            preds.extend(torch.argmax(out.logits, dim=1).cpu().numpy())
            true_vals.extend(batch['labels'].cpu().numpy())

    f1 = f1_score(true_vals, preds, average='macro')
    elapsed = int(time.time() - t0)
    print(f"   Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(train_loader_m):.4f} | Macro F1: {f1:.4f} | S√ºre: {elapsed}sn")

model_m.save_pretrained(SAVE_DIR_MULTI)
tokenizer.save_pretrained(SAVE_DIR_MULTI)
print(f"   ‚úÖ Uzman Kaydedildi.")

print("\n" + "="*60)
print("üéâ √áƒ∞FT BEYƒ∞NLƒ∞ Sƒ∞STEM TAMAMLANDI! (L4 TURBO MODE)")
print(f"üìÇ Modeller Drive'da hazƒ±r: /content/drive/MyDrive/Phishing_Project_FINAL")

  self.setter(val)


üî• DONANIM: NVIDIA L4
üöÄ MOD: L4 Performance Mode (Batch: 64, TF32: On)

üõ°Ô∏è A≈ûAMA 1: DEDEKT√ñR MODELƒ∞ Eƒûƒ∞Tƒ∞Lƒ∞YOR...
   ‚öñÔ∏è Dedekt√∂r Aƒüƒ±rlƒ±klarƒ± -> G√ºvenli: 0.67 | Phishing: 1.95


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler() # FP16 Hƒ±zlandƒ±rƒ±cƒ±
  with autocast():


   Epoch 1/3 | Loss: 0.0424 | F1: 0.9902 | S√ºre: 588sn


  with autocast():


   Epoch 2/3 | Loss: 0.0153 | F1: 0.9923 | S√ºre: 589sn


  with autocast():


   Epoch 3/3 | Loss: 0.0102 | F1: 0.9921 | S√ºre: 588sn
   ‚úÖ Dedekt√∂r Kaydedildi.

üé© A≈ûAMA 2: UZMAN MODELƒ∞ Eƒûƒ∞Tƒ∞Lƒ∞YOR (Multi-Class)...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with autocast():


   Epoch 1/3 | Loss: 1.1333 | Macro F1: 0.6991 | S√ºre: 152sn


  with autocast():


   Epoch 2/3 | Loss: 0.5945 | Macro F1: 0.7817 | S√ºre: 152sn


  with autocast():


   Epoch 3/3 | Loss: 0.4560 | Macro F1: 0.7817 | S√ºre: 151sn
   ‚úÖ Uzman Kaydedildi.

üéâ √áƒ∞FT BEYƒ∞NLƒ∞ Sƒ∞STEM TAMAMLANDI! (L4 TURBO MODE)
üìÇ Modeller Drive'da hazƒ±r: /content/drive/MyDrive/Phishing_Project_FINAL


In [None]:
!pip install -q gradio bitsandbytes accelerate

In [None]:
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import joblib
import numpy as np
import os
import re
from urllib.parse import urlparse

# ==============================================================================
# ‚öôÔ∏è 1. SETUP & LOADING
# ==============================================================================
BASE_PATH = "/content/drive/MyDrive/Phishing_Project_FINAL"
PATH_BINARY = os.path.join(BASE_PATH, "Brain_1_Detector")
PATH_MULTI = os.path.join(BASE_PATH, "Brain_2_Expert")
ENCODER_PATH = os.path.join(PATH_MULTI, "category_encoder.pkl")
LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üî• SYSTEM STARTING... Hardware: {torch.cuda.get_device_name(0)}")

# ------------------------------------------------------------------------------
# LOAD MODELS
# ------------------------------------------------------------------------------
print("\n‚è≥ [1/3] Loading XLM-RoBERTa Models...")
try:
    model_bin = XLMRobertaForSequenceClassification.from_pretrained(PATH_BINARY).to(device)
    tokenizer_bert = XLMRobertaTokenizer.from_pretrained(PATH_BINARY)
    model_bin.eval()

    model_multi = XLMRobertaForSequenceClassification.from_pretrained(PATH_MULTI).to(device)
    model_multi.eval()

    label_encoder = joblib.load(ENCODER_PATH)
    print("‚úÖ Detector and Expert Models Ready.")
except Exception as e:
    raise RuntimeError(f"Models could not be loaded: {e}")

print("\n‚è≥ [2/3] Loading Mistral-7B (Narrator)...")
try:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
    )
    llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
    llm_model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_ID, quantization_config=bnb_config, device_map="auto")
    print("‚úÖ Mistral-7B Ready.")
except Exception as e:
    raise RuntimeError(f"LLM could not be loaded: {e}")

# ==============================================================================
# üõ°Ô∏è WHITELIST (GLOBAL & TR)
# ==============================================================================
WHITELIST_DOMAINS = [
    # TR Banks
    "ziraatbank.com.tr", "garanti.com.tr", "isbank.com.tr", "akbank.com", "yapikredi.com.tr",
    "vakifbank.com.tr", "halkbank.com.tr", "denizbank.com", "qnbfinansbank.com", "enpara.com",
    # Government
    "turkiye.gov.tr", "gib.gov.tr", "uab.gov.tr", "egm.gov.tr", "meb.gov.tr", "saglik.gov.tr",
    # Logistics & E-Commerce
    "yurticikargo.com", "araskargo.com.tr", "mngkargo.com.tr", "suratkargo.com.tr", "ptt.gov.tr",
    "trendyol.com", "hepsiburada.com", "n11.com", "sahibinden.com", "amazon.com.tr", "amazon.com",
    # Global Tech
    "google.com", "gmail.com", "youtube.com", "microsoft.com", "outlook.com", "hotmail.com",
    "facebook.com", "instagram.com", "twitter.com", "linkedin.com", "netflix.com", "apple.com"
]

# ==============================================================================
# üõ†Ô∏è FEATURE EXTRACTOR (ENGLISH OUTPUT)
# ==============================================================================
def extract_features_text(url):
    features = []
    if not url: return "No Link"

    try:
        url_str = str(url).strip()
        if not url_str.startswith(('http://', 'https://')):
            parse_url = "http://" + url_str
        else:
            parse_url = url_str

        parsed = urlparse(parse_url)
        domain = parsed.netloc.lower()
        full_url = url_str.lower()
    except:
        return "Malformed URL"

    # 1. IP Address Check
    if re.search(r'^(\d{1,3}\.){3}\d{1,3}', domain):
        features.append("IP Address Usage")

    # 2. @ Symbol
    if "@" in full_url:
        features.append("@ Symbol (Redirection)")

    # 3. Hyphen Check
    if domain.count("-") > 2:
        features.append("Complex/Hyphenated Domain")

    # 4. Sensitive Words in Domain
    bad_words = ["login", "signin", "secure", "account", "update", "verify", "banking", "guvenlik"]
    found_in_domain = [w for w in bad_words if w in domain]

    if found_in_domain:
        features.append(f"Sensitive Word in Domain ({', '.join(found_in_domain)})")

    # 5. Suspicious TLDs
    suspicious_tlds = ['.xyz', '.top', '.club', '.zip', '.review', '.country', '.gdn', '.info']
    if any(domain.endswith(tld) for tld in suspicious_tlds):
        features.append("Suspicious TLD")

    if not features:
        return "Clean URL Structure"
    return ", ".join(features)

# ==============================================================================
# üó£Ô∏è 3. BRAIN: LLM REPORTING (ENGLISH PROMPT)
# ==============================================================================
def generate_llm_report(subject, message, url, is_phishing, confidence, category, tech_features):
    status = "DANGEROUS (PHISHING)" if is_phishing else "SAFE"

    prompt = f"""[INST] You are a Senior Cybersecurity Analyst. You have analyzed the following email.
    Explain the situation to the user in professional, clear ENGLISH.

    ANALYSIS DATA:
    - Status: {status} (Confidence Score: %{confidence:.2f})
    - Category: {category}
    - Technical Findings: {tech_features}

    EMAIL CONTENT:
    - Subject: {subject}
    - Message: {message}
    - URL: {url}

    YOUR TASKS:
    1. Explain WHY this email is {status}.
    2. If SAFE: Mention if it's a clean URL or looks legitimate.
    3. If DANGEROUS: Explain the attacker's goal (stealing credentials, panic, etc.) and the tricks used.
    4. Give a clear recommendation (Delete, Block, Do not click).
    5. Provide ONLY the analysis text. [/INST]
    """

    inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = llm_model.generate(**inputs, max_new_tokens=400, do_sample=True, temperature=0.7, pad_token_id=llm_tokenizer.eos_token_id)

    return llm_tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[-1].strip()

# ==============================================================================
# üöÄ MAIN PIPELINE
# ==============================================================================
def analyze_email_gradio(subject, message, url):
    # 0. WHITELIST CHECK
    if url:
        try:
            u_str = str(url).strip()
            if not u_str.startswith(('http', 'https')): u_str = "http://" + u_str
            clean_domain = urlparse(u_str).netloc.lower().replace("www.", "")

            for w in WHITELIST_DOMAINS:
                if clean_domain == w or clean_domain.endswith("." + w):
                    return "‚úÖ SAFE (Official)", "%100.00", {"OFFICIAL SITE": 1.0}, f"This URL ({clean_domain}) is in our trusted official domains list. It is verified as safe."
        except: pass

    # 1. Feature Extraction
    tech_feats = extract_features_text(url)
    full_text = f"KONU: {subject} MESAJ: {message} URL: {url}"

    # 2. Detector (Safe vs Phishing)
    inputs = tokenizer_bert(full_text, return_tensors="pt", truncation=True, max_length=128).to(device)
    with torch.no_grad():
        logits_bin = model_bin(**inputs).logits
        probs_bin = F.softmax(logits_bin, dim=1)
        pred_bin = torch.argmax(probs_bin, dim=1).item()
        conf_bin = probs_bin[0][pred_bin].item() * 100

    is_phishing = (pred_bin == 1)

    # 3. Expert (Category Probabilities)
    category_probs = {}
    top_category = "SAFE CONTENT"

    if is_phishing:
        with torch.no_grad():
            logits_multi = model_multi(**inputs).logits
            probs_multi = F.softmax(logits_multi, dim=1).cpu().numpy()[0]
            for i, cls in enumerate(label_encoder.classes_):
                category_probs[cls] = float(probs_multi[i])
            top_category = label_encoder.inverse_transform([torch.argmax(logits_multi).item()])[0]
    else:
        category_probs = {"SAFE": 1.0}

    # 4. Narrator (LLM)
    report = generate_llm_report(subject, message, url, is_phishing, conf_bin, top_category, tech_feats)

    status_txt = "üö® PHISHING DETECTED" if is_phishing else "‚úÖ SAFE"
    return status_txt, f"%{conf_bin:.2f}", category_probs, report

# ==============================================================================
# üé® GRADIO INTERFACE (ENGLISH)
# ==============================================================================
with gr.Blocks(theme=gr.themes.Soft(), title="Cyber Security AI") as app:
    gr.Markdown("""# üõ°Ô∏è 3-Brain Phishing Detection System""")

    with gr.Row():
        with gr.Column():
            t_subject = gr.Textbox(label="üìß Email Subject", placeholder="Ex: Your account is restricted")
            t_message = gr.Textbox(label="üì© Message Content", lines=5, placeholder="Paste the message body here...")
            t_url = gr.Textbox(label="üîó Suspicious URL", placeholder="http://...")
            btn = gr.Button("üîç START ANALYSIS", variant="primary")

            gr.Examples(
                examples=[
                    ["Information", "Dear customer, you can view your recent transactions via our mobile app.", "https://www.ziraatbank.com.tr"],
                    ["Invoice Payment", "Dear customer, your invoice is overdue. Click to pay immediately.", "http://garanti-bank-secure-payment.xyz/login"],
                    ["Personal Blog", "Hi, check out my new article!", "https://ahmet-blog.com/new-post"]
                ],
                inputs=[t_subject, t_message, t_url]
            )

        with gr.Column():
            out_status = gr.Textbox(label="üéØ Status", text_align="center")
            with gr.Row():
                out_conf = gr.Textbox(label="üìä Confidence Score")
                out_cat = gr.Label(label="üìÇ Category Probabilities", num_top_classes=7)
            out_report = gr.Textbox(label="üß† AI Expert Report", lines=10)

    btn.click(analyze_email_gradio, inputs=[t_subject, t_message, t_url], outputs=[out_status, out_conf, out_cat, out_report])

print("\nüöÄ Interface Launching...")
app.launch(share=True, debug=True)

üî• SYSTEM STARTING... Hardware: NVIDIA L4

‚è≥ [1/3] Loading XLM-RoBERTa Models...
‚úÖ Detector and Expert Models Ready.

‚è≥ [2/3] Loading Mistral-7B (Narrator)...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

‚úÖ Mistral-7B Ready.

üöÄ Interface Launching...


  with gr.Blocks(theme=gr.themes.Soft(), title="Cyber Security AI") as app:


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://3394aaab046c0f3022.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://3394aaab046c0f3022.gradio.live




In [None]:
import os
import shutil
import json
from google.colab import files

# ==============================================================================
# 1. KLAS√ñR VE AYARLAR
# ==============================================================================
DRIVE_PATH = "/content/drive/MyDrive/Phishing_Project_FINAL"
BRAIN_1_SRC = os.path.join(DRIVE_PATH, "Brain_1_Detector")
BRAIN_2_SRC = os.path.join(DRIVE_PATH, "Brain_2_Expert")

# Paket Klas√∂r√º (ƒ∞ndirilecek olan)
PACK_DIR = "/content/Project_Assets_For_Dev"
MODELS_DIR = os.path.join(PACK_DIR, "models")

# Temizlik
if os.path.exists(PACK_DIR): shutil.rmtree(PACK_DIR)
os.makedirs(MODELS_DIR)

print("üì¶ Geli≈ütirici Paketi Hazƒ±rlanƒ±yor...")

# ==============================================================================
# 2. MODELLERƒ∞ KOPYALA (Hammadde)
# ==============================================================================
print("‚è≥ Modeller Drive'dan alƒ±nƒ±yor...")
if os.path.exists(BRAIN_1_SRC) and os.path.exists(BRAIN_2_SRC):
    shutil.copytree(BRAIN_1_SRC, os.path.join(MODELS_DIR, "Brain_1_Detector"))
    shutil.copytree(BRAIN_2_SRC, os.path.join(MODELS_DIR, "Brain_2_Expert"))
    print("‚úÖ Modeller klas√∂re eklendi.")
else:
    print("‚ö†Ô∏è HATA: Modeller Drive'da bulunamadƒ±!")

# ==============================================================================
# 3. CONFIG.JSON (Veri Seti / Listeler)
# ==============================================================================
# Kodun i√ßine elle yazma diye, senin i√ßin veriyi JSON olarak kaydediyorum.
# Python'da: data = json.load(open('config.json')) diyerek kullanƒ±rsƒ±n.
config_data = {
    "whitelist_domains": [
        "ziraatbank.com.tr", "garanti.com.tr", "isbank.com.tr", "akbank.com", "yapikredi.com.tr",
        "vakifbank.com.tr", "halkbank.com.tr", "denizbank.com", "qnbfinansbank.com", "enpara.com",
        "turkiye.gov.tr", "gib.gov.tr", "uab.gov.tr", "egm.gov.tr", "meb.gov.tr", "saglik.gov.tr",
        "yurticikargo.com", "araskargo.com.tr", "mngkargo.com.tr", "suratkargo.com.tr", "ptt.gov.tr",
        "trendyol.com", "hepsiburada.com", "n11.com", "sahibinden.com", "amazon.com.tr", "amazon.com",
        "google.com", "gmail.com", "youtube.com", "microsoft.com", "outlook.com", "hotmail.com",
        "facebook.com", "instagram.com", "twitter.com", "linkedin.com", "netflix.com", "apple.com"
    ],
    "bad_words": ["login", "signin", "secure", "account", "update", "verify", "webscr", "banking", "guvenlik"],
    "suspicious_tlds": [".xyz", ".top", ".club", ".zip", ".review", ".country", ".gdn", ".info"],
    "llm_model_id": "mistralai/Mistral-7B-Instruct-v0.2"
}

with open(os.path.join(PACK_DIR, "config.json"), "w", encoding="utf-8") as f:
    json.dump(config_data, f, indent=4, ensure_ascii=False)
print("‚úÖ config.json (Whitelist ve Ayarlar) olu≈üturuldu.")

# ==============================================================================
# 4. REQUIREMENTS.TXT (Gereksinimler)
# ==============================================================================
# Flask kodunu yazarken import edeceƒüin k√ºt√ºphaneler
reqs = """flask
torch
transformers
accelerate
bitsandbytes
joblib
scipy
google-generativeai
"""
with open(os.path.join(PACK_DIR, "requirements.txt"), "w") as f:
    f.write(reqs)
print("‚úÖ requirements.txt olu≈üturuldu.")

# ==============================================================================
# 5. PAKETLE VE ƒ∞NDƒ∞R
# ==============================================================================
print("üì¶ ZIP olu≈üturuluyor...")
shutil.make_archive("/content/Phishing_Dev_Assets", 'zip', PACK_DIR)

print("‚¨áÔ∏è ƒ∞NDƒ∞Rƒ∞Lƒ∞YOR...")
files.download("/content/Phishing_Dev_Assets.zip")

üì¶ Geli≈ütirici Paketi Hazƒ±rlanƒ±yor...
‚è≥ Modeller Drive'dan alƒ±nƒ±yor...
‚úÖ Modeller klas√∂re eklendi.
‚úÖ config.json (Whitelist ve Ayarlar) olu≈üturuldu.
‚úÖ requirements.txt olu≈üturuldu.
üì¶ ZIP olu≈üturuluyor...
‚¨áÔ∏è ƒ∞NDƒ∞Rƒ∞Lƒ∞YOR...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>