In [1]:
import pandas as pd
import numpy as np
import os
import torch
from scipy.stats import zscore
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Sistem hazÄ±r. Cihaz: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Sistem hazÄ±r. Cihaz: cpu


In [2]:
# Proje dizinini ayarla
BASE_DIR = os.getcwd()
DATA_RAW_DIR = os.path.join(os.path.dirname(BASE_DIR), "data_raw")
csv_files = sorted([f for f in os.listdir(DATA_RAW_DIR) if f.endswith(".csv")])

# Verileri birleÅŸtir ve zamanÄ± dÃ¼zenle
df = pd.concat([pd.read_csv(os.path.join(DATA_RAW_DIR, f)) for f in csv_files], ignore_index=True)
df["ft"] = pd.to_datetime(df["ft"], errors="coerce")
df = df.sort_values("ft").reset_index(drop=True)

# KRÄ°TÄ°K SÃœTUNLAR (TÃ¼m alt sistemler dahil)
thermal_cols = ["ANALOGS_BUS_TEMP", "RADIO_SDR_TEMP", "ANALOGS_BATTERY1_TEMP", "ANALOGS_PL_TIRS_TEMP1"]
power_cols = ["ANALOGS_BATTERY_VOLTAGE", "ANALOGS_BATTERY_1_CURRENT"]
nav_cols = ["GPS_MSG_TRACKED_SATELLITES"]
env_cols = ["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"]

target_cols = thermal_cols + power_cols + nav_cols
df_sel = df[["ft"] + target_cols + env_cols].copy()

In [3]:
for col in target_cols:
    df_sel[col] = df_sel[col].fillna(method="ffill")
    df_sel[f"{col}_z"] = zscore(df_sel[col])
    df_sel[f"{col}_anomaly"] = df_sel[f"{col}_z"].abs() > 3

# Herhangi bir kolonda anomali olan anlarÄ± bul
anomaly_indices = df_sel[df_sel[[c for c in df_sel.columns if "_anomaly" in c]].any(axis=1)].index
print(f"Toplam tespit edilen anomali anÄ±: {len(anomaly_indices)}")

Toplam tespit edilen anomali anÄ±: 6699


In [4]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# LoRA AyarlarÄ± (Kapasiteyi ve hedef katmanlarÄ± belirledik)
lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)



In [5]:
def create_structured_prompt(row):
    t_sum = ", ".join([f"{c.split('_')[1]}:{row[c]:.1f}C" for c in thermal_cols])
    env = "SHADOW" if row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES" else "SUNLIGHT"
    return f"### Satellite Report\nEnv: {env}\nThermal: {t_sum}\nVolt: {row['ANALOGS_BATTERY_VOLTAGE']:.2f}V\nAnalysis:"

def build_train_sample(row):
    prompt = create_structured_prompt(row)
    # Modele Ã¶ÄŸreteceÄŸimiz ideal mÃ¼hendislik cevaplarÄ±
    if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
        ans = "Voltage drop detected during SHADOW. Expected behavior. Risk: Low."
    elif row["RADIO_SDR_TEMP"] > 11.5:
        ans = "SDR temperature high. Potential high communication load. Risk: Medium."
    else:
        ans = "Minor telemetry deviation. System within operational limits. Risk: Low."
    return f"{prompt} {ans} {tokenizer.eos_token}"

# EÄŸitim seti oluÅŸtur (Ä°lk 100 anomaliyi Ã¶rnek alÄ±yoruz)
train_texts = [build_train_sample(df_sel.iloc[i]) for i in anomaly_indices[:100]]
train_dataset = Dataset.from_dict({"text": train_texts})
tokenized_dataset = train_dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=128), batched=True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 8872.33 examples/s]


In [7]:
training_args = TrainingArguments(
    output_dir="./satellite_model_v2",
    per_device_train_batch_size=4,
    num_train_epochs=10,  # Ã–ÄŸrenme sÃ¼resini artÄ±rdÄ±k
    learning_rate=1e-4,
    save_strategy="no",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

Step,Training Loss


TrainOutput(global_step=250, training_loss=1.0785228271484375, metrics={'train_runtime': 205.3805, 'train_samples_per_second': 4.869, 'train_steps_per_second': 1.217, 'total_flos': 32888586240000.0, 'train_loss': 1.0785228271484375, 'epoch': 10.0})

In [8]:
results = []

def process_anomalies_to_excel():
    print(f"ðŸ”„ {len(anomaly_indices)} anomali iÅŸleniyor...")
    
    for idx in anomaly_indices:
        row = df_sel.iloc[idx]
        prompt = create_structured_prompt(row)
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **inputs, 
            max_new_tokens=40, 
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            temperature=0.6,
            do_sample=True
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        final_output = result.split("Analysis:")[-1].strip()
        
        # Hibrit Kontrol: Model saÃ§malarsa kural tabanlÄ± yedek devreye girer
        if len(final_output) < 10 or "V1V5" in final_output:
            if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
                final_output = "Battery voltage drop detected during SHADOW. Expected behavior. Risk: Low."
            else:
                final_output = "Telemetry deviation detected. Manual verification recommended. Risk: Medium."

        results.append({
            "Zaman": row["ft"],
            "Ortam": row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"],
            "Batarya_Volt": row["ANALOGS_BATTERY_VOLTAGE"],
            "Analiz_Yorumu": final_output
        })

    report_df = pd.DataFrame(results)
    report_df.to_excel("uydu_anomali_raporu.xlsx", index=False)
    print("âœ… Rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_anomali_raporu.xlsx")
    return report_df.head(10)

# Raporu Ã§alÄ±ÅŸtÄ±r ve ilk sonuÃ§larÄ± gÃ¶r
process_anomalies_to_excel()

ðŸ”„ 6699 anomali iÅŸleniyor...


KeyboardInterrupt: 

bu Ã§ok uzun sÃ¼rdÃ¼ durdurdum. 100 tane seÃ§tim.

In [9]:
results = []

def process_anomalies_to_excel():
    # Sadece ilk 100 tanesini iÅŸleyerek test ediyoruz
    subset_indices = anomaly_indices[:100]
    print(f"ðŸ”„ Toplam {len(anomaly_indices)} anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...")
    
    for idx in subset_indices:
        row = df_sel.iloc[idx]
        prompt = create_structured_prompt(row)
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **inputs, 
            max_new_tokens=40, 
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            temperature=0.6,
            do_sample=True
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Analiz kÄ±smÄ±nÄ± ayÄ±kla
        final_output = result.split("Analysis:")[-1].strip()
        
        # Hibrit Kontrol (Fallback)
        if len(final_output) < 10 or "V1V5" in final_output:
            if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
                final_output = "Battery voltage drop detected during SHADOW. Expected behavior. Risk: Low."
            else:
                final_output = "Telemetry deviation detected. Manual verification recommended. Risk: Medium."

        results.append({
            "Zaman": row["ft"],
            "Ortam": row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"],
            "Batarya_Volt": row["ANALOGS_BATTERY_VOLTAGE"],
            "Analiz_Yorumu": final_output
        })

    report_df = pd.DataFrame(results)
    report_df.to_excel("uydu_anomali_raporu.xlsx", index=False)
    print("\nâœ… Rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_anomali_raporu.xlsx")
    return report_df.head(10)

# Raporu Ã§alÄ±ÅŸtÄ±r
process_anomalies_to_excel()

ðŸ”„ Toplam 6699 anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...


ValueError: Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel.

saat formatÄ±ndan hata aldÄ±k muhtemelen dÃ¼zeltip tekrar deneyeceÄŸim

In [10]:
results = []

def process_anomalies_to_excel():
    # Sadece ilk 100 tanesini iÅŸleyerek test ediyoruz
    subset_indices = anomaly_indices[:100]
    print(f"ðŸ”„ Toplam {len(anomaly_indices)} anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...")
    
    for idx in subset_indices:
        row = df_sel.iloc[idx]
        prompt = create_structured_prompt(row)
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **inputs, 
            max_new_tokens=40, 
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            temperature=0.6,
            do_sample=True
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        final_output = result.split("Analysis:")[-1].strip()
        
        # Hibrit Kontrol (Fallback)
        if len(final_output) < 10 or "V1V5" in final_output:
            if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
                final_output = "Battery voltage drop detected during SHADOW. Expected behavior. Risk: Low."
            else:
                final_output = "Telemetry deviation detected. Manual verification recommended. Risk: Medium."

        # Zaman dilimi hatasÄ±nÄ± Ã§Ã¶zmek iÃ§in .replace(tzinfo=None) kullanÄ±yoruz
        clean_time = row["ft"].replace(tzinfo=None) if hasattr(row["ft"], "tzinfo") else row["ft"]

        results.append({
            "Zaman": clean_time,
            "Ortam": row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"],
            "Batarya_Volt": row["ANALOGS_BATTERY_VOLTAGE"],
            "Analiz_Yorumu": final_output
        })

    report_df = pd.DataFrame(results)
    
    # openpyxl yÃ¼klÃ¼ deÄŸilse hata verebilir, yÃ¼klemek gerekebilir: !pip install openpyxl
    report_df.to_excel("uydu_anomali_raporu.xlsx", index=False)
    
    print("\nâœ… Rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_anomali_raporu.xlsx")
    return report_df.head(10)

# Raporu Ã§alÄ±ÅŸtÄ±r
process_anomalies_to_excel()

ðŸ”„ Toplam 6699 anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...

âœ… Rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_anomali_raporu.xlsx


Unnamed: 0,Zaman,Ortam,Batarya_Volt,Analiz_Yorumu
0,2025-06-03 09:01:56.200,YES,12.0075,Minor drop detected. Risk: Low. VAC. _________...
1,2025-06-03 09:01:58.200,YES,12.0075,VERTEN TEST\nAnalysis. System behavior behavio...
2,2025-06-03 09:02:00.200,YES,12.0075,Voltage drop observed during SHADOWS. Risk: Lo...
3,2025-06-03 09:02:34.200,NO,12.0075,Voltage drop detected during the SUNLITH test....
4,2025-06-03 09:02:36.200,NO,12.0075,SDR data leak. Minor. Exposit: Low. ----------...
5,2025-06-03 09:02:38.200,NO,11.988,Voltage drop of this. Expected behavior. Risk:...
6,2025-06-03 09:02:40.200,NO,11.988,Voltage drop observed during SUNLAY. Expected ...
7,2025-06-03 09:02:42.200,NO,11.988,Voltage drop detected during SUNLAY. RISP: Hig...
8,2025-06-03 10:36:46.200,YES,12.0075,Voltage drop detected during SHADOWS. Expected...
9,2025-06-03 10:36:50.200,YES,12.0075,Voltage drop. Expected. Ex-lapse. Behavior. Ri...


In [11]:
results = []

def process_anomalies_to_excel():
    # Sadece ilk 100 tanesini iÅŸleyerek test ediyoruz (HÄ±z iÃ§in)
    subset_indices = anomaly_indices[:100]
    print(f"ðŸ”„ Toplam {len(anomaly_indices)} anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...")
    
    for idx in subset_indices:
        row = df_sel.iloc[idx]
        prompt = create_structured_prompt(row)
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **inputs, 
            max_new_tokens=40, 
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            temperature=0.6,
            do_sample=True
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        final_output = result.split("Analysis:")[-1].strip()
        
        # Hibrit Kontrol (Fallback)
        if len(final_output) < 10 or "V1V5" in final_output:
            if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
                final_output = "Battery voltage drop detected during SHADOW. Expected behavior. Risk: Low."
            else:
                final_output = "Telemetry deviation detected. Manual verification recommended. Risk: Medium."

        # Zaman dilimi hatasÄ±nÄ± Ã§Ã¶zmek iÃ§in
        clean_time = row["ft"].replace(tzinfo=None) if hasattr(row["ft"], "tzinfo") else row["ft"]

        # EXCEL'E EKLENECEK SÃœTUNLARI BURADA GENÄ°ÅžLETÄ°YORUZ
        results.append({
            "Zaman": clean_time,
            "Ortam": row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"],
            "Batarya_Volt": row["ANALOGS_BATTERY_VOLTAGE"],
            "GÃ¶vde_Sicaklik (BUS)": row["ANALOGS_BUS_TEMP"],       # Yeni eklendi
            "Radyo_Sicaklik (SDR)": row["RADIO_SDR_TEMP"],       # Yeni eklendi
            "GPS_Uydu_Sayisi": row["GPS_MSG_TRACKED_SATELLITES"], # Yeni eklendi
            "Analiz_Yorumu": final_output
        })

    report_df = pd.DataFrame(results)
    
    # Yeni dosyayÄ± kaydediyoruz
    report_df.to_excel("uydu_detayli_anomali_raporu.xlsx", index=False)
    
    print("\nâœ… DetaylÄ± rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_detayli_anomali_raporu.xlsx")
    return report_df.head(10)

# Raporu Ã§alÄ±ÅŸtÄ±r
process_anomalies_to_excel()

ðŸ”„ Toplam 6699 anomali iÃ§inden Ä°LK 100 tanesi iÅŸleniyor...

âœ… DetaylÄ± rapor baÅŸarÄ±yla oluÅŸturuldu: uydu_detayli_anomali_raporu.xlsx


Unnamed: 0,Zaman,Ortam,Batarya_Volt,GÃ¶vde_Sicaklik (BUS),Radyo_Sicaklik (SDR),GPS_Uydu_Sayisi,Analiz_Yorumu
0,2025-06-03 09:01:56.200,YES,12.0075,2.8105,11,0,Voltage drop detected during SHADOWS. Expected...
1,2025-06-03 09:01:58.200,YES,12.0075,2.627,11,0,Voltage drop detected during SHADOWS. Risk: Cr...
2,2025-06-03 09:02:00.200,YES,12.0075,2.8105,11,0,Voltage drop detected during SHADOWS. Expected...
3,2025-06-03 09:02:34.200,NO,12.0075,2.627,11,0,Voltage drop detected during the SUNLIGH signa...
4,2025-06-03 09:02:36.200,NO,12.0075,2.732,11,0,Voltage drop detected during SUNLING. Expected...
5,2025-06-03 09:02:38.200,NO,11.988,2.6795,11,0,Minor voltage drop. Minor out of SHARP. Expect...
6,2025-06-03 09:02:40.200,NO,11.988,2.6795,11,0,Low-Volt DATA DATA IN VIA. RIS: --------------...
7,2025-06-03 09:02:42.200,NO,11.988,2.732,11,0,Voltage drop detected during SUNLAY. Expected ...
8,2025-06-03 10:36:46.200,YES,12.0075,3.0965,11,0,Voltage drop detected during SHADOWS. Risk: Lo...
9,2025-06-03 10:36:50.200,YES,12.0075,2.993,11,0,Voltage drop detected during SHADOWS. Expected...
