In [1]:
import pandas as pd
import numpy as np
import os
import torch
import matplotlib.pyplot as plt
from scipy.stats import zscore
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset

print("KÃ¼tÃ¼phaneler yÃ¼klendi. Cihaz:", "GPU" if torch.cuda.is_available() else "CPU")

  from .autonotebook import tqdm as notebook_tqdm


KÃ¼tÃ¼phaneler yÃ¼klendi. Cihaz: CPU


In [2]:
# Proje dizini ayarlarÄ±
BASE_DIR = os.getcwd()
DATA_RAW_DIR = os.path.join(os.path.dirname(BASE_DIR), "data_raw")
csv_files = sorted([f for f in os.listdir(DATA_RAW_DIR) if f.endswith(".csv")])

# Verileri birleÅŸtir
df_list = [pd.read_csv(os.path.join(DATA_RAW_DIR, f)) for f in csv_files]
df = pd.concat(df_list, ignore_index=True)
df["ft"] = pd.to_datetime(df["ft"], errors="coerce")
df = df.sort_values("ft").reset_index(drop=True)

# KRÄ°TÄ°K ALT SÄ°STEM SÃœTUNLARI
thermal_cols = ["ANALOGS_BUS_TEMP", "RADIO_SDR_TEMP", "ANALOGS_BATTERY1_TEMP", "ANALOGS_PL_TIRS_TEMP1"]
power_cols = ["ANALOGS_BATTERY_VOLTAGE", "ANALOGS_BATTERY_1_CURRENT"]
nav_cols = ["GPS_MSG_TRACKED_SATELLITES"]
env_cols = ["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] # BaÄŸlam (GÃ¶lge durumu)

target_cols = thermal_cols + power_cols + nav_cols
df_sel = df[["ft"] + target_cols + env_cols].copy()

print(f"Veri yÃ¼klendi. Toplam satÄ±r: {len(df_sel)}")

Veri yÃ¼klendi. Toplam satÄ±r: 205553


In [3]:
for col in target_cols:
    df_sel[col] = df_sel[col].fillna(method="ffill")
    df_sel[f"{col}_z"] = zscore(df_sel[col])
    # 3 Sigma kuralÄ±
    df_sel[f"{col}_anomaly"] = df_sel[f"{col}_z"].abs() > 3

# Herhangi bir sistemde anomali olan satÄ±rlarÄ± ayÄ±kla
anomaly_indices = df_sel[df_sel[[c for c in df_sel.columns if "_anomaly" in c]].any(axis=1)].index
print(f"Tespit edilen toplam anomali noktasÄ±: {len(anomaly_indices)}")

Tespit edilen toplam anomali noktasÄ±: 6699


In [4]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)

# LoRA AyarlarÄ±
lora_config = LoraConfig(
    r=16, # Kapasiteyi biraz artÄ±rdÄ±k (korelasyonlarÄ± iyi Ã§Ã¶zmesi iÃ§in)
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 82,207,488 || trainable%: 0.3587




In [5]:
def create_structured_prompt(row):
    # Termal Ã¶zet
    t_summary = ", ".join([f"{c.split('_')[1]}: {row[c]:.1f}C" for c in thermal_cols])
    # GÃ¼Ã§ Ã¶zet
    p_summary = f"Volt: {row['ANALOGS_BATTERY_VOLTAGE']:.2f}V, Current: {row['ANALOGS_BATTERY_1_CURRENT']:.2f}A"
    # GÃ¶lge durumu
    env = "SHADOW" if row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES" else "SUNLIGHT"
    
    prompt = f"### Satellite Telemetry Report\n" \
             f"Environment: {env}\n" \
             f"Thermal: {t_summary}\n" \
             f"Power: {p_summary}\n" \
             f"GPS Sats: {row['GPS_MSG_TRACKED_SATELLITES']}\n" \
             f"Analysis: "
    return prompt

# Ã–rnek bir veri seti oluÅŸturalÄ±m (EÄŸitim iÃ§in)
train_prompts = [create_structured_prompt(df_sel.iloc[i]) for i in anomaly_indices[:50]]
train_dataset = Dataset.from_dict({"text": train_prompts})

def tokenize_func(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_dataset = train_dataset.map(tokenize_func, batched=True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [00:00<00:00, 2070.75 examples/s]


In [6]:
def analyze_anomaly(row_index):
    row = df_sel.iloc[row_index]
    prompt = create_structured_prompt(row)
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=40)
    
    analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    print("\n" + "="*50)
    print(f"ðŸš¨ ALARM AT {row['ft']}")
    print(f"Sistem Durumu: {prompt}")
    # Burada modelin Ã¼rettiÄŸi analizi gÃ¶receÄŸiz
    print(f"ðŸ§  MODEL ANALÄ°ZÄ°: {analysis.split('Analysis: ')[-1]}")
    print("="*50)

# Ä°lk anomaliyi analiz et
if len(anomaly_indices) > 0:
    analyze_anomaly(anomaly_indices[0])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



ðŸš¨ ALARM AT 2025-06-03 09:01:56.200000+00:00
Sistem Durumu: ### Satellite Telemetry Report
Environment: SHADOW
Thermal: BUS: 2.8C, SDR: 11.0C, BATTERY1: 6.4C, PL: 5.2C
Power: Volt: 12.01V, Current: -1.57A
GPS Sats: 0
Analysis: 
ðŸ§  MODEL ANALÄ°ZÄ°: -------------
The data is available at http://www.shadow.com/shadow/shadow/shadow/shadow/shadow/shadow


halÃ¼sinasyon gÃ¶rÃ¼yor link uydurdu.