In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd

In [3]:
df = pd.read_csv('../features_early_cycles.csv')

In [4]:
# Güvenli olsun diye kopya al
df = df.copy()

# 1) cycle_life sütununu stringlerden floata çevir
# Örn: "[904.]" -> 904.0
df['cycle_life'] = (
    df['cycle_life']
    .astype(str)                       # hepsini string yap
    .str.strip()                       # boşlukları temizle
    .str.replace('[\[\]]', '', regex=True)  # köşeli parantezleri sil
    .astype(float)                     # floata çevir
)

# 2) Feature kolonlarının da sayısal olduğundan emin ol
feature_cols = ['Qd_mean', 'Qd_std', 'IR_mean', 'IR_std', 'Tavg_mean', 'dQd_slope']

for col in feature_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 3) Hem X hem y'de NaN olan satırları at
df = df.dropna(subset=['cycle_life'] + feature_cols)


  .str.replace('[\[\]]', '', regex=True)  # köşeli parantezleri sil


In [5]:
# RANDOM FOREST MODELİ İLE TAHMİN 

results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*50}")
    print(f"İlk {n_cycles} döngü ile tahmin")
    print(f"{'='*50}")
    
    df_subset = df[df['n_cycles'] == n_cycles]
    
    # Özellikleri ve hedefi ayır
    X = df_subset[['Qd_mean', 'Qd_std', 'IR_mean', 'IR_std', 'Tavg_mean', 'dQd_slope']]
    y = df_subset['cycle_life']
    
    # Eğit-test ayırma
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Model eğit
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Tahmin
    y_pred = model.predict(X_test)
    
    # Performans metrikleri
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[n_cycles] = {'MAE': mae, 'R2': r2}
    print(f"MAE: {mae:.2f}")
    print(f"R² Score: {r2:.4f}")

    print("\nÖzellik Önemi:")
    for feat, imp in zip(X.columns, model.feature_importances_):
        print(f"  {feat}: {imp:.4f}")


İlk 25 döngü ile tahmin
MAE: 148.01
R² Score: -0.4098

Özellik Önemi:
  Qd_mean: 0.0523
  Qd_std: 0.2652
  IR_mean: 0.3877
  IR_std: 0.1602
  Tavg_mean: 0.0982
  dQd_slope: 0.0364

İlk 50 döngü ile tahmin
MAE: 109.30
R² Score: -0.0180

Özellik Önemi:
  Qd_mean: 0.0324
  Qd_std: 0.6189
  IR_mean: 0.1688
  IR_std: 0.1156
  Tavg_mean: 0.0258
  dQd_slope: 0.0386

İlk 100 döngü ile tahmin
MAE: 73.19
R² Score: 0.6709

Özellik Önemi:
  Qd_mean: 0.1711
  Qd_std: 0.0464
  IR_mean: 0.1479
  IR_std: 0.3477
  Tavg_mean: 0.1287
  dQd_slope: 0.1584


In [9]:
print(f"\n{'='*50}")
print("ÖZET - Erken Tahmin Performansı")
print(f"{'='*50}")
for n_cycles, metrics in results.items():
    print(f"First {n_cycles} Cycle - R²: {metrics['R2']:.4f}, MAE: {metrics['MAE']:.2f}")


ÖZET - Erken Tahmin Performansı
First 25 Cycle - R²: 0.0026, MAE: 132.16
First 50 Cycle - R²: 0.2635, MAE: 98.12
First 100 Cycle - R²: 0.4507, MAE: 96.29


In [10]:
# ============================================
# Bu sadece dq/dv bakıyor özellikleri ile Random Forest modeli
# ============================================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# ============================================
# 2) Veriyi yükle
# ============================================
# dqdv_features.csv = extract_dqdv_features ile oluşturduğumuz csv
df = pd.read_csv("../dqdv_features.csv")

print("Orijinal satır sayısı:", len(df))
print(df.head())

# ============================================
# 3) cycle_life sütununu temizle (y hedef değişkeni)
# ============================================
df = df.copy()

# cycle_life bazı durumlarda string veya "[904.]" gibi formatlarda olabilir
df["cycle_life"] = (
    df["cycle_life"]
    .astype(str)
    .str.strip()
    .str.replace(r"[\[\]]", "", regex=True)   # köşeli parantezleri sil
)

df["cycle_life"] = pd.to_numeric(df["cycle_life"], errors="coerce")

print("cycle_life içinde NaN sayısı:", df["cycle_life"].isna().sum())

# ============================================
# 4) Özellik kolonlarını tanımla
# ============================================
candidate_features = [
    "dqdv_peak_first", "dqdv_peak_last", "dqdv_peak_delta",
    "dqdv_peak_mean", "dqdv_peak_std", "dqdv_peak_slope",
    "dqdv_area_first", "dqdv_area_last", "dqdv_area_delta",
    "dqdv_area_mean", "dqdv_area_std", "dqdv_area_slope",
]

# Dosyada gerçekten olan kolonları filtrele (hata almamak için)
feature_cols = [c for c in candidate_features if c in df.columns]
print("\nKullanılacak feature kolonları:")
print(feature_cols)

# cycle_life + feature'larda NaN olan satırları at
df = df.dropna(subset=["cycle_life"] + feature_cols)
print("\nTemizlik sonrası satır sayısı:", len(df))

# ============================================
# 5) Her n_cycles için ayrı model eğit ve değerlendir
# ============================================
results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*60}")
    print(f"n_cycles = {n_cycles} için model eğitimi")
    print(f"{'='*60}")
    
    df_subset = df[df["n_cycles"] == n_cycles].copy()
    print("Bu n_cycles için satır sayısı:", len(df_subset))
    
    # Yeterli veri yoksa atla
    if len(df_subset) < 5:
        print("⚠ Yeterli veri yok, bu n_cycles atlanıyor.")
        continue
    
    X = df_subset[feature_cols]
    y = df_subset["cycle_life"]
    
    # Train-test ayır
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Model tanımı
    model = RandomForestRegressor(
        n_estimators=400,
        min_samples_leaf=2,
        max_features="sqrt",
        random_state=42,
        n_jobs=-1
    )
    
    # Eğit
    model.fit(X_train, y_train)
    
    # Tahmin
    y_pred = model.predict(X_test)
    
    # Metrikler
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[n_cycles] = {"MAE": mae, "R2": r2}
    
    print(f"MAE: {mae:.2f}")
    print(f"R² : {r2:.4f}")
    
    # Feature önemleri
    print("\nÖzellik önemleri:")
    importances = pd.Series(model.feature_importances_, index=feature_cols)
    importances = importances.sort_values(ascending=False)
    for feat, imp in importances.items():
        print(f"  {feat:20s}: {imp:.4f}")

# ============================================
# 6) Sonuçları özetle
# ============================================
print("\n\n=== ÖZET SONUÇLAR ===")
for n, res in results.items():
    print(f"n_cycles = {n:3d} → MAE = {res['MAE']:.2f}, R² = {res['R2']:.4f}")


Orijinal satır sayısı: 141
  cell_id  n_cycles  cycle_life  dqdv_peak_first  dqdv_peak_last  \
0    b1c0        25       477.0              0.0             0.0   
1    b1c0        50       477.0              0.0             0.0   
2    b1c0       100       477.0              0.0             0.0   
3    b1c1        25       491.0              0.0             0.0   
4    b1c1        50       491.0              0.0             0.0   

   dqdv_peak_delta  dqdv_peak_mean  dqdv_peak_std  dqdv_peak_slope  \
0              0.0       -0.000554       0.002055     2.424354e-05   
1              0.0       -0.000975       0.003214    -6.694987e-06   
2              0.0       -0.001317       0.004021    -1.087883e-05   
3              0.0       -0.000824       0.004037     1.378437e-20   
4              0.0       -0.000546       0.003014     1.351227e-05   

   dqdv_area_first  ...  dqdv_area_delta  dqdv_area_mean  dqdv_area_std  \
0      -772.439850  ...        -3.478434     -748.333411       8.318

In [11]:
# ============================================================
# PredictCycleLife.ipynb
# Amaç: features_early_cycles.csv -> model -> performans
# Random Forest kullanılarak pil ömrü tahmini
# ============================================================

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score


df = pd.read_csv("../features_early_cycles.csv")
print("Toplam satır:", len(df))
print(df.head())


df = df.copy()

df["cycle_life"] = (
    df["cycle_life"]
    .astype(str)
    .str.strip()
    .str.replace(r"[\[\]]", "", regex=True)
)
df["cycle_life"] = pd.to_numeric(df["cycle_life"], errors="coerce")

print("cycle_life içindeki NaN sayısı:", df["cycle_life"].isna().sum())


candidate_features = [
    "Qd_mean", "Qd_std", "IR_mean", "IR_std", "Tavg_mean", "dQd_slope",
    "dqdv_peak_first", "dqdv_peak_last", "dqdv_peak_delta",
    "dqdv_peak_mean", "dqdv_peak_std", "dqdv_peak_slope",
    "dqdv_area_first", "dqdv_area_last", "dqdv_area_delta",
    "dqdv_area_mean", "dqdv_area_std", "dqdv_area_slope",
]

feature_cols = [c for c in candidate_features if c in df.columns]
print("\nKullanılacak feature kolonları:")
print(feature_cols)

# Feature ve cycle_life'ta NaN olan satırları at
df = df.dropna(subset=["cycle_life"] + feature_cols)
print("\nTemizlik sonrası satır sayısı:", len(df))


results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*60}")
    print(f"n_cycles = {n_cycles} için model")
    print(f"{'='*60}")
    
    df_subset = df[df["n_cycles"] == n_cycles].copy()
    print("Bu n_cycles için satır sayısı:", len(df_subset))
    
    if len(df_subset) < 10:
        print("⚠ Yeterli veri yok, bu n_cycles atlanıyor.")
        continue
    
    X = df_subset[feature_cols]
    y = df_subset["cycle_life"]
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    model = RandomForestRegressor(
        n_estimators=400,
        min_samples_leaf=2,
        max_features="sqrt",
        random_state=42,
        n_jobs=-1
    )
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"MAE: {mae:.2f}")
    print(f"R² : {r2:.4f}")
    
    # Feature önemleri
    importances = pd.Series(model.feature_importances_, index=feature_cols)
    importances = importances.sort_values(ascending=False)
    
    print("\nÖzellik önemleri:")
    for feat, imp in importances.items():
        print(f"  {feat:20s}: {imp:.4f}")
    
    results[n_cycles] = {"MAE": mae, "R2": r2}


print("\n\n=== ÖZET SONUÇLAR ===")
for n, res in results.items():
    print(f"n_cycles = {n:3d} → MAE = {res['MAE']:.2f}, R² = {res['R2']:.4f}")


Toplam satır: 117
  cell_id  n_cycles  cycle_life   Qd_mean    Qd_std   IR_mean    IR_std  \
0    b1c0        25       477.0  1.104835  0.001730  0.017266  0.000128   
1    b1c0        50       477.0  1.105175  0.002964  0.017289  0.000118   
2    b1c0       100       477.0  1.105038  0.003119  0.017338  0.000122   
3    b1c1        25       491.0  1.107000  0.001108  0.017764  0.000211   
4    b1c1        50       491.0  1.106932  0.001209  0.017645  0.000195   

   Tavg_mean  dQd_slope  dqdv_peak_first  ...  dqdv_area_delta  \
0  31.822145   0.000161              0.0  ...        -3.478434   
1  31.804072   0.000022              0.0  ...        36.278293   
2  31.807497   0.000016              0.0  ...        48.199909   
3  33.542142   0.000030              0.0  ...         3.463564   
4  33.605174   0.000016              0.0  ...        11.476454   

   dqdv_area_mean  dqdv_area_std  dqdv_area_slope  dqdv_peakpos_first  \
0     -748.333411       8.318482         0.077890            