In [1]:
import pandas as pd
import numpy as np
import os
import torch
from scipy.stats import zscore
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Sistem hazÄ±r. Cihaz: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Sistem hazÄ±r. Cihaz: cpu


In [2]:
# Dizin ayarlarÄ±
BASE_DIR = os.getcwd()
DATA_RAW_DIR = os.path.join(os.path.dirname(BASE_DIR), "data_raw")
csv_files = sorted([f for f in os.listdir(DATA_RAW_DIR) if f.endswith(".csv")])

# Verileri birleÅŸtir ve zamanÄ± dÃ¼zenle
df = pd.concat([pd.read_csv(os.path.join(DATA_RAW_DIR, f)) for f in csv_files], ignore_index=True)
df["ft"] = pd.to_datetime(df["ft"], errors="coerce")
df = df.sort_values("ft").reset_index(drop=True)

# KRÄ°TÄ°K SÃœTUNLAR
thermal_cols = ["ANALOGS_BUS_TEMP", "RADIO_SDR_TEMP", "ANALOGS_BATTERY1_TEMP", "ANALOGS_PL_TIRS_TEMP1"]
power_cols = ["ANALOGS_BATTERY_VOLTAGE", "ANALOGS_BATTERY_1_CURRENT"]
nav_cols = ["GPS_MSG_TRACKED_SATELLITES"]
env_cols = ["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"]

target_cols = thermal_cols + power_cols + nav_cols
df_sel = df[["ft"] + target_cols + env_cols].copy()

In [3]:
for col in target_cols:
    df_sel[col] = df_sel[col].fillna(method="ffill")
    df_sel[f"{col}_z"] = zscore(df_sel[col])
    df_sel[f"{col}_anomaly"] = df_sel[f"{col}_z"].abs() > 3

# En az bir hatasÄ± olan satÄ±rlarÄ±n indeksleri
anomaly_indices = df_sel[df_sel[[c for c in df_sel.columns if "_anomaly" in c]].any(axis=1)].index
print(f"Tespit edilen anomali anÄ±: {len(anomaly_indices)}")

Tespit edilen anomali anÄ±: 6699


In [4]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)



In [5]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [6]:
def create_structured_prompt(row):
    t_sum = ", ".join([f"{c.split('_')[1]}:{row[c]:.1f}C" for c in thermal_cols])
    env = "SHADOW" if row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES" else "SUNLIGHT"
    
    return f"### Satellite Report\nEnv: {env}\nThermal: {t_sum}\nVolt: {row['ANALOGS_BATTERY_VOLTAGE']:.2f}V\nAnalysis:"

def build_train_sample(row):
    prompt = create_structured_prompt(row)
    # Ä°deal cevap kalÄ±plarÄ±
    if row["ANALOGS_BATTERY_VOLTAGE"] < 12.1 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
        ans = "Voltage drop detected due to SHADOW period. Expected behavior. Risk: Low."
    elif row["RADIO_SDR_TEMP"] > 11.5:
        ans = "High thermal levels in SDR. Check communication load. Risk: Medium."
    else:
        ans = "Parameter deviation detected. Monitor subsystem health. Risk: Low."
    
    return f"{prompt} {ans} {tokenizer.eos_token}"

# Veri setini oluÅŸtur ve eÄŸitme hazÄ±rla
train_texts = [build_train_sample(df_sel.iloc[i]) for i in anomaly_indices[:100]]
train_dataset = Dataset.from_dict({"text": train_texts})
tokenized_dataset = train_dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=128), batched=True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 9729.53 examples/s]


In [7]:
training_args = TrainingArguments(
    output_dir="./satellite_model",
    per_device_train_batch_size=4,
    num_train_epochs=5, # 5 tur yeterli
    learning_rate=2e-4,
    save_strategy="no",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


TrainOutput(global_step=125, training_loss=2.8876572265625, metrics={'train_runtime': 89.1916, 'train_samples_per_second': 5.606, 'train_steps_per_second': 1.401, 'total_flos': 16444293120000.0, 'train_loss': 2.8876572265625, 'epoch': 5.0})

In [8]:
def analyze_anomaly(idx):
    row = df_sel.iloc[idx]
    prompt = create_structured_prompt(row)
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs, 
        max_new_tokens=40, 
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=2
    )
    
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Sadece analiz kÄ±smÄ±nÄ± ayÄ±kla
    final_output = result.split("Analysis:")[-1].strip()
    
    print("-" * 50)
    print(f"ðŸš¨ ZAMAN: {row['ft']}")
    print(f"ðŸ“Š VERÄ°: {prompt.replace('### Attention!! Satellite Report', '').strip()}")
    print(f"ðŸ§  ANALÄ°Z: {final_output}")

# Ä°lk 3 anomaliyi test edelim
for i in range(3):
    analyze_anomaly(anomaly_indices[i])

--------------------------------------------------
ðŸš¨ ZAMAN: 2025-06-03 09:01:56.200000+00:00
ðŸ“Š VERÄ°: ### Satellite Report
Env: SHADOW
Thermal: BUS:2.8C, SDR:11.0C, BATTERY1:6.4C, PL:5.2C
Volt: 12.01V
Analysis:
ðŸ§  ANALÄ°Z: Shortwave detected. Short wave. Expected. Event. Risk. Type. Precondition. The measurement stage.Vom.Event..End. We now have a chance to experience a full
--------------------------------------------------
ðŸš¨ ZAMAN: 2025-06-03 09:01:58.200000+00:00
ðŸ“Š VERÄ°: ### Satellite Report
Env: SHADOW
Thermal: BUS:2.6C, SDR:11.0C, BATTERY1:6.4C, PL:5.2C
Volt: 12.01V
Analysis:
ðŸ§  ANALÄ°Z: Voltage of VAC level. Pre-AC. Low. Risk. High. Rate.
The cause of this defect is unknown. We are investigating the cause. The cause is not known. V
--------------------------------------------------
ðŸš¨ ZAMAN: 2025-06-03 09:02:00.200000+00:00
ðŸ“Š VERÄ°: ### Satellite Report
Env: SHADOW
Thermal: BUS:2.8C, SDR:11.0C, BATTERY1:6.4C, PL:5.2C
Volt: 12.01V
Analysis:
ðŸ§  ANALÄ°Z: Volt

"Shortwave detected" rastgele terim salladÄ± hala iyi deÄŸil. epoch sayÄ±sÄ±mÄ± arttÄ±ralÄ±m. 10 Epoch deneyeceÄŸim ve anomalileri excel e kaydedeceÄŸim
