In [1]:
import pandas as pd
import numpy as np
import os
import torch
from scipy.stats import zscore
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Sistem hazÄ±r. Cihaz: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Sistem hazÄ±r. Cihaz: cpu


In [2]:
# Veri dizinini ayarla
BASE_DIR = os.getcwd()
DATA_RAW_DIR = os.path.join(os.path.dirname(BASE_DIR), "data_raw")
csv_files = sorted([f for f in os.listdir(DATA_RAW_DIR) if f.endswith(".csv")])

# Verileri birleÅŸtir
df = pd.concat([pd.read_csv(os.path.join(DATA_RAW_DIR, f)) for f in csv_files], ignore_index=True)
df["ft"] = pd.to_datetime(df["ft"], errors="coerce")
df = df.sort_values("ft").reset_index(drop=True)

# Alt Sistem KolonlarÄ±
thermal_cols = ["ANALOGS_BUS_TEMP", "RADIO_SDR_TEMP", "ANALOGS_BATTERY1_TEMP", "ANALOGS_PL_TIRS_TEMP1"]
power_cols = ["ANALOGS_BATTERY_VOLTAGE", "ANALOGS_BATTERY_1_CURRENT"]
nav_cols = ["GPS_MSG_TRACKED_SATELLITES"]
env_cols = ["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"]

target_cols = thermal_cols + power_cols + nav_cols
df_sel = df[["ft"] + target_cols + env_cols].copy()

In [3]:
for col in target_cols:
    df_sel[col] = df_sel[col].fillna(method="ffill")
    df_sel[f"{col}_z"] = zscore(df_sel[col])
    df_sel[f"{col}_anomaly"] = df_sel[f"{col}_z"].abs() > 3

anomaly_indices = df_sel[df_sel[[c for c in df_sel.columns if "_anomaly" in c]].any(axis=1)].index
print(f"Tespit edilen toplam anomali anÄ±: {len(anomaly_indices)}")

Tespit edilen toplam anomali anÄ±: 6699


In [4]:
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)



In [5]:
def create_structured_prompt(row):
    t_sum = ", ".join([f"{c.split('_')[1]}:{row[c]:.1f}C" for c in thermal_cols])
    env = "GÃ–LGE" if row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES" else "GÃœNEÅž"
    return f"### Uydu Durum Raporu\nOrtam: {env}\nSÄ±caklÄ±k: {t_sum}\nVolt: {row['ANALOGS_BATTERY_VOLTAGE']:.2f}V\nAnaliz:"

def build_train_sample(row):
    prompt = create_structured_prompt(row)
    if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
        ans = "GÃ¶lge periyodu nedeniyle voltaj dÃ¼ÅŸÃ¼ÅŸÃ¼. Beklenen durum. Risk: DÃ¼ÅŸÃ¼k."
    elif row["RADIO_SDR_TEMP"] > 11.5:
        ans = "SDR biriminde Ä±sÄ±nma tespit edildi. HaberleÅŸme yÃ¼kÃ¼ yÃ¼ksek olabilir. Risk: Orta."
    else:
        ans = "Telemetri verilerinde sapma. Sistem parametreleri takip edilmeli. Risk: DÃ¼ÅŸÃ¼k."
    return f"{prompt} {ans} {tokenizer.eos_token}"

train_texts = [build_train_sample(df_sel.iloc[i]) for i in anomaly_indices[:100]]
train_dataset = Dataset.from_dict({"text": train_texts})
tokenized_dataset = train_dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=128), batched=True)

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:00<00:00, 9287.45 examples/s]


In [6]:
training_args = TrainingArguments(
    output_dir="./satellite_tr_model",
    per_device_train_batch_size=4,
    num_train_epochs=10,
    learning_rate=1e-4,
    save_strategy="no",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


TrainOutput(global_step=250, training_loss=3.57658447265625, metrics={'train_runtime': 174.7965, 'train_samples_per_second': 5.721, 'train_steps_per_second': 1.43, 'total_flos': 32888586240000.0, 'train_loss': 3.57658447265625, 'epoch': 10.0})

In [7]:
results = []

def process_anomalies_to_excel_tr():
    # HÄ±z iÃ§in ilk 100 anomali
    subset_indices = anomaly_indices[:100]
    print(f"ðŸ”„ {len(subset_indices)} anomali TÃ¼rkÃ§e olarak iÅŸleniyor...")
    
    for idx in subset_indices:
        row = df_sel.iloc[idx]
        prompt = create_structured_prompt(row)
        
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **inputs, 
            max_new_tokens=45, 
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            temperature=0.5,
            do_sample=True
        )
        
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        final_output = result.split("Analiz:")[-1].strip()
        
        # TÃ¼rkÃ§e Hibrit Filtre (Emniyet Kemeri)
        if len(final_output) < 10 or "_" in final_output or "V1V5" in final_output:
            if row["ANALOGS_BATTERY_VOLTAGE"] < 12.05 and row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES":
                final_output = "GÃ¶lge geÃ§iÅŸi nedeniyle voltaj dÃ¼ÅŸÃ¼ÅŸÃ¼. Normal sistem davranÄ±ÅŸÄ±. Risk: DÃ¼ÅŸÃ¼k."
            else:
                final_output = "Telemetri sapmasÄ± tespit edildi. Uzman kontrolÃ¼ Ã¶nerilir. Risk: Orta."

        # Timezone HatasÄ± Ã‡Ã¶zÃ¼mÃ¼
        clean_time = row["ft"].replace(tzinfo=None) if hasattr(row["ft"], "tzinfo") else row["ft"]

        results.append({
            "Zaman": clean_time,
            "YÃ¶rÃ¼nge_Durumu": "GÃ–LGE" if row["REFS_SUN_ECLIPSE_EARTH_UMBRA_FLAG"] == "YES" else "GÃœNEÅž",
            "Batarya_Volt": row["ANALOGS_BATTERY_VOLTAGE"],
            "GÃ¶vde_SÄ±caklÄ±k": row["ANALOGS_BUS_TEMP"],
            "Radyo_SÄ±caklÄ±k": row["RADIO_SDR_TEMP"],
            "Yapay_Zeka_Analizi": final_output
        })

    report_df = pd.DataFrame(results)
    report_df.to_excel("uydu_final_raporu_tr.xlsx", index=False)
    print("\nâœ… TÃ¼rkÃ§e Final Raporu HazÄ±r: uydu_final_raporu_tr.xlsx")
    return report_df.head(10)

process_anomalies_to_excel_tr()

ðŸ”„ 100 anomali TÃ¼rkÃ§e olarak iÅŸleniyor...

âœ… TÃ¼rkÃ§e Final Raporu HazÄ±r: uydu_final_raporu_tr.xlsx


Unnamed: 0,Zaman,YÃ¶rÃ¼nge_Durumu,Batarya_Volt,GÃ¶vde_SÄ±caklÄ±k,Radyo_SÄ±caklÄ±k,Yapay_Zeka_Analizi
0,2025-06-03 09:01:56.200,GÃ–LGE,12.0075,2.8105,11,"BV:8.6C\nCÃ–lge:Â¡C:6C, CHV:12.5C\nRice:Â¤V\nV:4...."
1,2025-06-03 09:01:58.200,GÃ–LGE,12.0075,2.627,11,"RÃ¼kse: GÃ¼ÄŸlu: DÃ¼kselk, Åžetiz: Åžek: GÃ¶k.5C.8C.0..."
2,2025-06-03 09:02:00.200,GÃ–LGE,12.0075,2.8105,11,SDR6.6.9V.6\nVibril:5\nSÃ¼cak.V. for luice.\nDu...
3,2025-06-03 09:02:34.200,GÃœNEÅž,12.0075,2.627,11,"DÃ¼lenkisÅž.SÃ¼cakleÅž: SÃ¼ckeÅž, iz. DÃ¼renkis.SÄ±lÄ±l..."
4,2025-06-03 09:02:36.200,GÃœNEÅž,12.0075,2.732,11,DURUM.\nCÃ¼llig: GÃ¼neÅž.\nVox: D.V.\nDURUM: V.2....
5,2025-06-03 09:02:38.200,GÃœNEÅž,11.988,2.6795,11,"ÄžÄ±klenu, ÄžÃœÅžnu.\nV:6C.0V\nViz: iziziz.\nCe: Ä±k..."
6,2025-06-03 09:02:40.200,GÃœNEÅž,11.988,2.6795,11,Telemetri sapmasÄ± tespit edildi. Uzman kontrol...
7,2025-06-03 09:02:42.200,GÃœNEÅž,11.988,2.732,11,Bus:13.0V\nVDR:12.0G\nVap:5\nV:13\nVice: ipsum...
8,2025-06-03 10:36:46.200,GÃ–LGE,12.0075,3.0965,11,SDR.1.0V\nCice: S.5.5V\nVice: 0.4V\nDil: ÄŸÄ±kÄ±k...
9,2025-06-03 10:36:50.200,GÃ–LGE,12.0075,2.993,11,Ä°kalÄ±kÄ±kÃ¼k.\nSÃ¼caklan: GekÃ¼cÄ±k. The mainstay o...
