In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd

In [2]:
df = pd.read_csv('../features_early_cycles.csv')

In [3]:
# Güvenli olsun diye kopya al
df = df.copy()

# 1) cycle_life sütununu stringlerden floata çevir
# Örn: "[904.]" -> 904.0
df['cycle_life'] = (
    df['cycle_life']
    .astype(str)                       # hepsini string yap
    .str.strip()                       # boşlukları temizle
    .str.replace('[\[\]]', '', regex=True)  # köşeli parantezleri sil
    .astype(float)                     # floata çevir
)

# 2) Feature kolonlarının da sayısal olduğundan emin ol
feature_cols = ['Qd_mean', 'IR_mean', 'IR_std', 'Tavg_mean', 'dQd_slope']

for col in feature_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 3) Hem X hem y'de NaN olan satırları at
df = df.dropna(subset=['cycle_life'] + feature_cols)


In [4]:
# RANDOM FOREST MODELİ İLE TAHMİN 

results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*50}")
    print(f"İlk {n_cycles} döngü ile tahmin")
    print(f"{'='*50}")
    
    df_subset = df[df['n_cycles'] == n_cycles]
    
    # Özellikleri ve hedefi ayır
    X = df_subset[['Qd_mean', 'IR_mean', 'IR_std', 'Tavg_mean', 'dQd_slope']]
    y = df_subset['cycle_life']
    
    # Eğit-test ayırma
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Model eğit
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Tahmin
    y_pred = model.predict(X_test)
    
    # Performans metrikleri
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[n_cycles] = {'MAE': mae, 'R2': r2}
    print(f"MAE: {mae:.2f}")
    print(f"R² Score: {r2:.4f}")

    print("\nÖzellik Önemi:")
    for feat, imp in zip(X.columns, model.feature_importances_):
        print(f"  {feat}: {imp:.4f}")


İlk 25 döngü ile tahmin
MAE: 116.38
R² Score: 0.5699

Özellik Önemi:
  Qd_mean: 0.1151
  IR_mean: 0.1666
  IR_std: 0.1061
  Tavg_mean: 0.3631
  dQd_slope: 0.2490

İlk 50 döngü ile tahmin


MAE: 123.66
R² Score: 0.4718

Özellik Önemi:
  Qd_mean: 0.1177
  IR_mean: 0.1313
  IR_std: 0.1848
  Tavg_mean: 0.3005
  dQd_slope: 0.2656

İlk 100 döngü ile tahmin
MAE: 111.82
R² Score: 0.5795

Özellik Önemi:
  Qd_mean: 0.1077
  IR_mean: 0.0957
  IR_std: 0.0568
  Tavg_mean: 0.4426
  dQd_slope: 0.2972


In [5]:
print(f"\n{'='*50}")
print("ÖZET - Erken Tahmin Performansı")
print(f"{'='*50}")
for n_cycles, metrics in results.items():
    print(f"First {n_cycles} Cycle - R²: {metrics['R2']:.4f}, MAE: {metrics['MAE']:.2f}")


ÖZET - Erken Tahmin Performansı
First 25 Cycle - R²: 0.5699, MAE: 116.38
First 50 Cycle - R²: 0.4718, MAE: 123.66
First 100 Cycle - R²: 0.5795, MAE: 111.82


In [6]:
# ============================================
# Bu sadece dq/dv bakıyor özellikleri ile Random Forest modeli
# ============================================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# ============================================
# 2) Veriyi yükle
# ============================================
# dqdv_features.csv = extract_dqdv_features ile oluşturduğumuz csv
df = pd.read_csv("../dqdv_features.csv")

print("Orijinal satır sayısı:", len(df))
print(df.head())

# ============================================
# 3) cycle_life sütununu temizle (y hedef değişkeni)
# ============================================
df = df.copy()

# cycle_life bazı durumlarda string veya "[904.]" gibi formatlarda olabilir
df["cycle_life"] = (
    df["cycle_life"]
    .astype(str)
    .str.strip()
    .str.replace(r"[\[\]]", "", regex=True)   # köşeli parantezleri sil
)

df["cycle_life"] = pd.to_numeric(df["cycle_life"], errors="coerce")

print("cycle_life içinde NaN sayısı:", df["cycle_life"].isna().sum())

# ============================================
# 4) Özellik kolonlarını tanımla
# ============================================
candidate_features = [
    "dqdv_peak_first", "dqdv_peak_last", "dqdv_peak_delta",
    "dqdv_peak_mean", "dqdv_peak_std", "dqdv_peak_slope",
    "dqdv_area_first", "dqdv_area_last", "dqdv_area_delta",
    "dqdv_area_mean", "dqdv_area_std", "dqdv_area_slope",
]

# Dosyada gerçekten olan kolonları filtrele (hata almamak için)
feature_cols = [c for c in candidate_features if c in df.columns]
print("\nKullanılacak feature kolonları:")
print(feature_cols)

# cycle_life + feature'larda NaN olan satırları at
df = df.dropna(subset=["cycle_life"] + feature_cols)
print("\nTemizlik sonrası satır sayısı:", len(df))

# ============================================
# 5) Her n_cycles için ayrı model eğit ve değerlendir
# ============================================
results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*60}")
    print(f"n_cycles = {n_cycles} için model eğitimi")
    print(f"{'='*60}")
    
    df_subset = df[df["n_cycles"] == n_cycles].copy()
    print("Bu n_cycles için satır sayısı:", len(df_subset))
    
    # Yeterli veri yoksa atla
    if len(df_subset) < 5:
        print("⚠ Yeterli veri yok, bu n_cycles atlanıyor.")
        continue
    
    X = df_subset[feature_cols]
    y = df_subset["cycle_life"]
    
    # Train-test ayır
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Model tanımı
    model = RandomForestRegressor(
        n_estimators=400,
        min_samples_leaf=2,
        max_features="sqrt",
        random_state=42,
        n_jobs=-1
    )
    
    # Eğit
    model.fit(X_train, y_train)
    
    # Tahmin
    y_pred = model.predict(X_test)
    
    # Metrikler
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[n_cycles] = {"MAE": mae, "R2": r2}
    
    print(f"MAE: {mae:.2f}")
    print(f"R² : {r2:.4f}")
    
    # Feature önemleri
    print("\nÖzellik önemleri:")
    importances = pd.Series(model.feature_importances_, index=feature_cols)
    importances = importances.sort_values(ascending=False)
    for feat, imp in importances.items():
        print(f"  {feat:20s}: {imp:.4f}")

# ============================================
# 6) Sonuçları özetle
# ============================================
print("\n\n=== ÖZET SONUÇLAR ===")
for n, res in results.items():
    print(f"n_cycles = {n:3d} → MAE = {res['MAE']:.2f}, R² = {res['R2']:.4f}")


Orijinal satır sayısı: 141
  cell_id  n_cycles  cycle_life  dqdv_peak_first  dqdv_peak_last  \
0    b1c0        25       477.0              0.0             0.0   
1    b1c0        50       477.0              0.0             0.0   
2    b1c0       100       477.0              0.0             0.0   
3    b1c1        25       491.0              0.0             0.0   
4    b1c1        50       491.0              0.0             0.0   

   dqdv_peak_delta  dqdv_peak_mean  dqdv_peak_std  dqdv_peak_slope  \
0              0.0       -0.000554       0.002055     2.424354e-05   
1              0.0       -0.000975       0.003214    -6.694987e-06   
2              0.0       -0.001317       0.004021    -1.087883e-05   
3              0.0       -0.000824       0.004037     1.378437e-20   
4              0.0       -0.000546       0.003014     1.351227e-05   

   dqdv_area_first  ...  dqdv_area_delta  dqdv_area_mean  dqdv_area_std  \
0      -772.439850  ...        -3.478434     -748.333411       8.318

MAE: 177.23
R² : -0.3458

Özellik önemleri:
  dqdv_area_first     : 0.1461
  dqdv_area_delta     : 0.1349
  dqdv_peak_std       : 0.1326
  dqdv_peak_mean      : 0.1243
  dqdv_area_std       : 0.1170
  dqdv_area_mean      : 0.1073
  dqdv_area_last      : 0.0938
  dqdv_area_slope     : 0.0741
  dqdv_peak_slope     : 0.0684
  dqdv_peak_delta     : 0.0007
  dqdv_peak_last      : 0.0007
  dqdv_peak_first     : 0.0000

n_cycles = 50 için model eğitimi
Bu n_cycles için satır sayısı: 39


MAE: 155.33
R² : -0.3532

Özellik önemleri:
  dqdv_area_slope     : 0.1899
  dqdv_area_delta     : 0.1535
  dqdv_area_last      : 0.1154
  dqdv_area_first     : 0.0976
  dqdv_peak_last      : 0.0861
  dqdv_peak_delta     : 0.0734
  dqdv_peak_mean      : 0.0664
  dqdv_peak_std       : 0.0651
  dqdv_area_mean      : 0.0649
  dqdv_area_std       : 0.0506
  dqdv_peak_slope     : 0.0372
  dqdv_peak_first     : 0.0000

n_cycles = 100 için model eğitimi
Bu n_cycles için satır sayısı: 39


MAE: 155.70
R² : -0.3194

Özellik önemleri:
  dqdv_area_delta     : 0.2130
  dqdv_area_last      : 0.1713
  dqdv_area_slope     : 0.1662
  dqdv_area_std       : 0.1482
  dqdv_area_first     : 0.0853
  dqdv_peak_mean      : 0.0673
  dqdv_peak_std       : 0.0605
  dqdv_area_mean      : 0.0524
  dqdv_peak_slope     : 0.0357
  dqdv_peak_last      : 0.0001
  dqdv_peak_delta     : 0.0000
  dqdv_peak_first     : 0.0000


=== ÖZET SONUÇLAR ===
n_cycles =  25 → MAE = 177.23, R² = -0.3458
n_cycles =  50 → MAE = 155.33, R² = -0.3532
n_cycles = 100 → MAE = 155.70, R² = -0.3194


In [7]:
# ============================================================
# PredictCycleLife.ipynb
# Amaç: features_early_cycles.csv -> model -> performans
# Random Forest kullanılarak pil ömrü tahmini
# ============================================================

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score


df = pd.read_csv("../features_early_cycles.csv")
print("Toplam satır:", len(df))
print(df.head())


df = df.copy()

df["cycle_life"] = (
    df["cycle_life"]
    .astype(str)
    .str.strip()
    .str.replace(r"[\[\]]", "", regex=True)
)
df["cycle_life"] = pd.to_numeric(df["cycle_life"], errors="coerce")

print("cycle_life içindeki NaN sayısı:", df["cycle_life"].isna().sum())


candidate_features = [
    "Qd_mean", "IR_mean", "IR_std", "Tavg_mean", "dQd_slope",
    "dqdv_peak_first", "dqdv_peak_last", "dqdv_peak_delta",
    "dqdv_peak_mean", "dqdv_peak_std", "dqdv_peak_slope",
    "dqdv_area_first", "dqdv_area_last", "dqdv_area_delta",
    "dqdv_area_mean", "dqdv_area_std", "dqdv_area_slope",
]

feature_cols = [c for c in candidate_features if c in df.columns]
print("\nKullanılacak feature kolonları:")
print(feature_cols)

# Feature ve cycle_life'ta NaN olan satırları at
df = df.dropna(subset=["cycle_life"] + feature_cols)
print("\nTemizlik sonrası satır sayısı:", len(df))


results = {}

for n_cycles in [25, 50, 100]:
    print(f"\n{'='*60}")
    print(f"n_cycles = {n_cycles} için model")
    print(f"{'='*60}")
    
    df_subset = df[df["n_cycles"] == n_cycles].copy()
    print("Bu n_cycles için satır sayısı:", len(df_subset))
    
    if len(df_subset) < 10:
        print("⚠ Yeterli veri yok, bu n_cycles atlanıyor.")
        continue
    
    X = df_subset[feature_cols]
    y = df_subset["cycle_life"]
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    model = RandomForestRegressor(
        n_estimators=400,
        min_samples_leaf=2,
        max_features="sqrt",
        random_state=42,
        n_jobs=-1
    )
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"MAE: {mae:.2f}")
    print(f"R² : {r2:.4f}")
    
    # Feature önemleri
    importances = pd.Series(model.feature_importances_, index=feature_cols)
    importances = importances.sort_values(ascending=False)
    
    print("\nÖzellik önemleri:")
    for feat, imp in importances.items():
        print(f"  {feat:20s}: {imp:.4f}")
    
    results[n_cycles] = {"MAE": mae, "R2": r2}


print("\n\n=== ÖZET SONUÇLAR ===")
for n, res in results.items():
    print(f"n_cycles = {n:3d} → MAE = {res['MAE']:.2f}, R² = {res['R2']:.4f}")


Toplam satır: 138
  cell_id  n_cycles  cycle_life   Qd_mean    Qd_std   IR_mean    IR_std  \
0    b1c0        25      1190.0  1.050162  0.232862  0.016045  0.003307   
1    b1c0        50      1190.0  1.063359  0.165187  0.016324  0.002355   
2    b1c0       100      1190.0  1.069791  0.116982  0.016477  0.001673   
3    b1c1        25      1179.0  1.037350  0.211759  0.015537  0.004582   
4    b1c1        50      1179.0  1.059614  0.151382  0.016225  0.003312   

   Tavg_mean  dQd_slope  dqdv_peak_first  ...  dqdv_area_delta  \
0  30.398942   0.009734              0.0  ...      -744.916918   
1  30.978520   0.002006              0.0  ...      -744.817931   
2  31.287710   0.000441              0.0  ...      -737.888695   
3  30.246174   0.010195              0.0  ...      -748.112101   
4  30.745152   0.002608              0.0  ...      -742.677784   

   dqdv_area_mean  dqdv_area_std  dqdv_area_slope  dqdv_peakpos_first  \
0     -716.620597     148.132289        -6.899972            

MAE: 155.24
R² : 0.1559

Özellik önemleri:
  Tavg_mean           : 0.1533
  dqdv_area_slope     : 0.1064
  dQd_slope           : 0.1027
  IR_mean             : 0.0798
  Qd_mean             : 0.0690
  dqdv_area_std       : 0.0638
  IR_std              : 0.0635
  dqdv_area_last      : 0.0627
  dqdv_area_delta     : 0.0621
  dqdv_area_mean      : 0.0595
  dqdv_peak_slope     : 0.0528
  dqdv_peak_std       : 0.0438
  dqdv_peak_mean      : 0.0404
  dqdv_peak_last      : 0.0209
  dqdv_peak_delta     : 0.0192
  dqdv_peak_first     : 0.0000
  dqdv_area_first     : 0.0000

n_cycles = 50 için model
Bu n_cycles için satır sayısı: 46


MAE: 132.78
R² : 0.3814

Özellik önemleri:
  dqdv_area_slope     : 0.1834
  Tavg_mean           : 0.1607
  dQd_slope           : 0.0832
  IR_std              : 0.0801
  IR_mean             : 0.0776
  dqdv_peak_mean      : 0.0651
  dqdv_peak_std       : 0.0544
  dqdv_area_delta     : 0.0541
  Qd_mean             : 0.0530
  dqdv_area_last      : 0.0499
  dqdv_peak_slope     : 0.0471
  dqdv_area_mean      : 0.0457
  dqdv_area_std       : 0.0456
  dqdv_peak_delta     : 0.0000
  dqdv_peak_last      : 0.0000
  dqdv_peak_first     : 0.0000
  dqdv_area_first     : 0.0000

n_cycles = 100 için model
Bu n_cycles için satır sayısı: 46


MAE: 129.27
R² : 0.4918

Özellik önemleri:
  dqdv_area_slope     : 0.2057
  Tavg_mean           : 0.1477
  dqdv_area_delta     : 0.1239
  dqdv_area_last      : 0.1058
  dQd_slope           : 0.0903
  dqdv_area_mean      : 0.0586
  IR_mean             : 0.0512
  Qd_mean             : 0.0439
  dqdv_area_std       : 0.0414
  dqdv_peak_std       : 0.0362
  IR_std              : 0.0349
  dqdv_peak_slope     : 0.0311
  dqdv_peak_mean      : 0.0293
  dqdv_peak_delta     : 0.0000
  dqdv_peak_last      : 0.0000
  dqdv_peak_first     : 0.0000
  dqdv_area_first     : 0.0000


=== ÖZET SONUÇLAR ===
n_cycles =  25 → MAE = 155.24, R² = 0.1559
n_cycles =  50 → MAE = 132.78, R² = 0.3814
n_cycles = 100 → MAE = 129.27, R² = 0.4918
