In [19]:
# ==========================================================
# QSVM + RL Integrated Framework (Corrected & Improved)
# ==========================================================

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pennylane as qml
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import random
from collections import deque
import joblib

# ---------------------------
# 1️⃣ Load and Prepare Dataset
# ---------------------------
# Make sure this path is correct for your system
DATA_PATH = r"D:\Programming\VidyutAiHackathon\data\Full-Data.csv"
df = pd.read_csv(DATA_PATH)
print("Dataset shape:", df.shape)
print(df.head())

def sample_equal(df, label_col='Label', n=50, random_state=42):
    out = df.groupby(label_col, group_keys=False).apply(
        lambda g: g.sample(n if len(g) >= n else n, replace=(len(g) < n), random_state=random_state)
    )
    return out.reset_index(drop=True)

sampled = sample_equal(df, label_col='Label', n=150)
X = sampled[['SoC', 'Temperature', 'Voltage']].values
y = sampled['Label'].values

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# ---------------------------
# 2️⃣ Quantum Feature Map + psi_train
# ---------------------------
n_qubits = X_train_s.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)

def feature_map(x):
    for i in range(n_qubits):
        qml.RY(x[i] * np.pi, wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    for i in range(n_qubits):
        qml.RZ(x[i] * np.pi / 2, wires=i)

@qml.qnode(dev)
def psi(x):
    feature_map(x)
    return qml.state()

print("Computing psi_train (quantum states for training samples)...")
psi_train = np.array([psi(x) for x in X_train_s])
print("psi_train shape:", psi_train.shape)

# ---------------------------
# 3️⃣ Quantum Kernel & QSVM
# ---------------------------
def quantum_kernel_matrix(X1, X2):
    m, n = len(X1), len(X2)
    K = np.zeros((m, n))
    for i in range(m):
        psi_i = psi(X1[i])
        for j in range(n):
            psi_j = psi(X2[j])
            K[i, j] = np.abs(np.vdot(psi_i, psi_j))**2
    return K

print("Computing train kernel...")
K_train = quantum_kernel_matrix(X_train_s, X_train_s)
print("Computing test kernel...")
K_test = quantum_kernel_matrix(X_test_s, X_train_s)

svc = SVC(kernel='precomputed', probability=True, decision_function_shape='ovr', random_state=42)
svc.fit(K_train, y_train)

y_pred = svc.predict(K_test)
print("QSVM accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ---------------------------
# 4️⃣ RL Environment with Improved Rewards
# ---------------------------
class SimpleBatteryEnv:
    def __init__(self):
        self.soc_min, self.soc_max = 5.0, 100.0
        self.temp_min, self.temp_max = 273.15, 350.0
        self.voltage_min, self.voltage_max = 2.5, 4.2
        self.action_space = [0, 1, 2]  # 0:fast, 1:slow, 2:pause
        self.max_steps = 20
        self.reset()

    def reset(self):
        # Start in a normal, safe state
        self.state = np.array([50.0 + np.random.randn()*5, 300.0 + np.random.randn()*3, 3.7 + np.random.randn()*0.05])
        self.t = 0
        return self.state

    def _get_obs(self):
        return self.state.copy()

    def step(self, action):
        soc, temp, volt = self.state

        if action == 0:  # fast
            soc += 6.0; temp += 3.0; volt += 0.03
        elif action == 1:  # slow
            soc += 2.5; temp += 0.8; volt += 0.01
        else:  # pause
            soc -= 0.5; temp -= 0.8; volt -= 0.005

        soc = np.clip(soc, self.soc_min, self.soc_max)
        temp = np.clip(temp, self.temp_min, self.temp_max)
        volt = np.clip(volt, self.voltage_min, self.voltage_max)
        self.state = np.array([soc, temp, volt])
        self.t += 1

        def qsvm_predict_proba(raw_X):
            raw_X = np.atleast_2d(raw_X)
            Xs = scaler.transform(raw_X)
            psi_x = psi(Xs[0])
            K = np.array([np.abs(np.vdot(psi_x, pt))**2 for pt in psi_train]).reshape(1, -1)
            return svc.predict_proba(K)[0]

        probs = qsvm_predict_proba(self.state.reshape(1, -1))
        pred_class = np.argmax(probs) # 0=Low, 1=Medium, 2=High

        # --- CORRECTED & IMPROVED Reward Function ---
        reward = 0.0
        reward += 0.1 # Small reward for surviving a step

        if 20.0 <= soc <= 80.0:
            reward += 1.0
        else:
            reward -= 1.5

        if temp < 315.0:
            reward += 1.0
        elif 315.0 <= temp <= 325.0:
            reward -= 5.0  # Medium penalty for getting warm
        else:  # temp > 325.0
            reward -= 15.0 # Severe penalty for being hot

        # Make action penalties extremely clear
        if pred_class == 2 and action == 0:
            reward -= 25.0  # HUGE PENALTY for fast charging during high risk
        elif pred_class == 1 and action == 0:
            reward -= 10.0 # Significant penalty for fast charging during medium risk

        # Reward taking the CORRECT actions
        if pred_class == 2 and action == 2: # Pause when risk is high
            reward += 10.0
        elif pred_class == 1 and action == 1: # Slow charge when risk is medium
            reward += 5.0
        
        done = self.t >= self.max_steps or temp > 330.0 or soc <= 5.0
        if done and (temp > 330.0 or soc <= 5.0):
            reward -= 10.0 # Extra penalty for ending in a bad state

        return self._get_obs(), reward, done, {"qsvm_pred": pred_class, "qsvm_probs": probs}

# ---------------------------
# 5️⃣ RL Agent (DQN) - No changes needed here
# ---------------------------
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) # Increased memory size for better learning
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.995 # Slower decay to encourage more exploration
        self.lr = 1e-3
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = models.Sequential([
            layers.Input(shape=(self.state_size,)),
            layers.Dense(32, activation='relu'),
            layers.Dense(32, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=optimizers.Adam(learning_rate=self.lr), loss='mse')
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, s, a, r, s2, done):
        self.memory.append((s, a, r, s2, done))

    def act(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_vals = self.model.predict(s.reshape(1, -1), verbose=0)[0]
        return np.argmax(q_vals)

    def replay(self, batch_size=32): # Slightly larger batch size
        if len(self.memory) < batch_size:
            return
        batch = random.sample(self.memory, batch_size)
        states = np.array([b[0] for b in batch])
        actions = np.array([b[1] for b in batch])
        rewards = np.array([b[2] for b in batch])
        next_states = np.array([b[3] for b in batch])
        dones = np.array([b[4] for b in batch])

        target = self.model.predict(states, verbose=0)
        t_next = self.target_model.predict(next_states, verbose=0)

        for i in range(len(batch)):
            target[i, actions[i]] = rewards[i] if dones[i] else rewards[i] + self.gamma * np.max(t_next[i])

        self.model.train_on_batch(states, target)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# ---------------------------
# 6️⃣ Train RL Agent (Longer Training)
# ---------------------------
env = SimpleBatteryEnv()
state_size = 3
action_size = 3
agent = DQNAgent(state_size, action_size)

# *** INCREASED EPISODES FOR BETTER TRAINING ***
n_episodes = 50
target_update_freq = 5
batch_size = 32

for ep in range(n_episodes):
    state = env.reset()
    total_reward = 0
    done = False
    while not done:
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        agent.replay(batch_size)
    if ep % target_update_freq == 0:
        agent.update_target_model()
    print(f"Episode {ep+1}/{n_episodes} | Total Reward={total_reward:.2f} | Epsilon={agent.epsilon:.3f}")

# ---------------------------
# 7️⃣ Save Models & Artifacts
# ---------------------------
SAVE_DIR = r"D:\Programming\VidyutAiHackathon\models"
os.makedirs(SAVE_DIR, exist_ok=True)

joblib.dump(scaler, os.path.join(SAVE_DIR, "scaler.joblib"))
joblib.dump(svc, os.path.join(SAVE_DIR, "qsvm_svc.joblib"))
np.save(os.path.join(SAVE_DIR, "X_train_s.npy"), X_train_s)
np.save(os.path.join(SAVE_DIR, "psi_train.npy"), psi_train)
agent.model.save(os.path.join(SAVE_DIR, "dqn_agent_model.h5"))

print(f"\n✅ All models saved to: {SAVE_DIR}")

Dataset shape: (1152, 4)
          SoC  Temperature   Voltage  Label
0  100.000000   298.150000  4.014300      0
1   99.173138   298.849283  3.916820      0
2   98.346276   299.665201  3.887562      0
3   97.519413   300.497825  3.877287      0
4   96.692551   301.327592  3.870545      0
Computing psi_train (quantum states for training samples)...


  out = df.groupby(label_col, group_keys=False).apply(


psi_train shape: (360, 8)
Computing train kernel...
Computing test kernel...
QSVM accuracy: 0.8
              precision    recall  f1-score   support

           0       0.66      0.83      0.74        30
           1       0.77      0.57      0.65        30
           2       1.00      1.00      1.00        30

    accuracy                           0.80        90
   macro avg       0.81      0.80      0.80        90
weighted avg       0.81      0.80      0.80        90

Episode 1/50 | Total Reward=-8.50 | Epsilon=1.000
Episode 2/50 | Total Reward=23.50 | Epsilon=0.956
Episode 3/50 | Total Reward=-131.50 | Epsilon=0.865
Episode 4/50 | Total Reward=0.50 | Epsilon=0.782
Episode 5/50 | Total Reward=42.00 | Epsilon=0.708
Episode 6/50 | Total Reward=-25.00 | Epsilon=0.640
Episode 7/50 | Total Reward=23.50 | Epsilon=0.579
Episode 8/50 | Total Reward=42.00 | Epsilon=0.524
Episode 9/50 | Total Reward=42.00 | Epsilon=0.474
Episode 10/50 | Total Reward=-13.00 | Epsilon=0.429
Episode 11/50 | Tot



Episode 50/50 | Total Reward=42.00 | Epsilon=0.050

✅ All models saved to: D:\Programming\VidyutAiHackathon\models


In [7]:
!pip install plotly

Collecting plotly
  Downloading plotly-6.3.1-py3-none-any.whl.metadata (8.5 kB)
Downloading plotly-6.3.1-py3-none-any.whl (9.8 MB)
   ---------------------------------------- 0.0/9.8 MB ? eta -:--:--
   -- ------------------------------------- 0.5/9.8 MB 5.6 MB/s eta 0:00:02
   ------ --------------------------------- 1.6/9.8 MB 4.9 MB/s eta 0:00:02
   --------- ------------------------------ 2.4/9.8 MB 5.0 MB/s eta 0:00:02
   ------------ --------------------------- 3.1/9.8 MB 4.5 MB/s eta 0:00:02
   ----------------- ---------------------- 4.2/9.8 MB 4.6 MB/s eta 0:00:02
   -------------------- ------------------- 5.0/9.8 MB 4.6 MB/s eta 0:00:02
   ------------------------- -------------- 6.3/9.8 MB 4.7 MB/s eta 0:00:01
   ------------------------------ --------- 7.6/9.8 MB 5.0 MB/s eta 0:00:01
   ----------------------------------- ---- 8.7/9.8 MB 5.0 MB/s eta 0:00:01
   ---------------------------------------  9.7/9.8 MB 5.2 MB/s eta 0:00:01
   -------------------------------------

In [None]:
import joblib, numpy as np
joblib.dump(scaler, "/mnt/data/scaler.joblib")
joblib.dump(svc, "/mnt/data/qsvm_svc.joblib")
np.save("/mnt/data/X_train_s.npy", X_train_s)   # training set after StandardScaler
np.save("/mnt/data/psi_train.npy", psi_train)  # optional: statevectors of psi(X_train_s)
agent.model.save("/mnt/data/dqn_agent_model.h5")

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp311-cp311-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.32.1-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Collecting wrapt>=1.11.0 (from tensorflow)
  Downloading wrapt-1.17.3-cp311-cp311-win_amd64.whl.metadata (6.5 kB)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow)
  Downloading grpcio-1.75.1-cp

In [6]:
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.7.2-cp311-cp311-win_amd64.whl (8.9 MB)
Using cached joblib-1.5.2-py3-none-any.whl (308 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn

   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   -------------------------- ------------- 2/3 [scikit-learn]
   -----------------

In [11]:
# ==========================================================
# QSVM + RL Integrated Framework (Corrected & Improved)
# ==========================================================

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pennylane as qml
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import random
from collections import deque
import joblib

# ---------------------------
# 1️⃣ Load and Prepare Dataset
# ---------------------------
# Make sure this path is correct for your system
DATA_PATH = r"D:\Programming\VidyutAiHackathon\data\Full-Data.csv"
df = pd.read_csv(DATA_PATH)
print("Dataset shape:", df.shape)
print(df.head())

def sample_equal(df, label_col='Label', n=50, random_state=42):
    out = df.groupby(label_col, group_keys=False).apply(
        lambda g: g.sample(n if len(g) >= n else n, replace=(len(g) < n), random_state=random_state)
    )
    return out.reset_index(drop=True)

sampled = sample_equal(df, label_col='Label', n=50)
X = sampled[['SoC', 'Temperature', 'Voltage']].values
y = sampled['Label'].values

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# ---------------------------
# 2️⃣ Quantum Feature Map + psi_train
# ---------------------------
n_qubits = X_train_s.shape[1]
dev = qml.device("default.qubit", wires=n_qubits)

def feature_map(x):
    for i in range(n_qubits):
        qml.RY(x[i] * np.pi, wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    for i in range(n_qubits):
        qml.RZ(x[i] * np.pi / 2, wires=i)

@qml.qnode(dev)
def psi(x):
    feature_map(x)
    return qml.state()

print("Computing psi_train (quantum states for training samples)...")
psi_train = np.array([psi(x) for x in X_train_s])
print("psi_train shape:", psi_train.shape)

# ---------------------------
# 3️⃣ Quantum Kernel & QSVM
# ---------------------------
def quantum_kernel_matrix(X1, X2):
    m, n = len(X1), len(X2)
    K = np.zeros((m, n))
    for i in range(m):
        psi_i = psi(X1[i])
        for j in range(n):
            psi_j = psi(X2[j])
            K[i, j] = np.abs(np.vdot(psi_i, psi_j))**2
    return K

print("Computing train kernel...")
K_train = quantum_kernel_matrix(X_train_s, X_train_s)
print("Computing test kernel...")
K_test = quantum_kernel_matrix(X_test_s, X_train_s)

svc = SVC(kernel='precomputed', probability=True, decision_function_shape='ovr', random_state=42)
svc.fit(K_train, y_train)

y_pred = svc.predict(K_test)
print("QSVM accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ---------------------------
# 4️⃣ RL Environment with Improved Rewards
# ---------------------------
class SimpleBatteryEnv:
    def __init__(self):
        self.soc_min, self.soc_max = 5.0, 100.0
        self.temp_min, self.temp_max = 273.15, 350.0
        self.voltage_min, self.voltage_max = 2.5, 4.2
        self.action_space = [0, 1, 2]  # 0:fast, 1:slow, 2:pause
        self.max_steps = 20
        self.reset()

    def reset(self):
        # Start in a normal, safe state
        self.state = np.array([50.0 + np.random.randn()*5, 300.0 + np.random.randn()*3, 3.7 + np.random.randn()*0.05])
        self.t = 0
        return self.state

    def _get_obs(self):
        return self.state.copy()

    def step(self, action):
        soc, temp, volt = self.state

        if action == 0:  # fast
            soc += 6.0; temp += 3.0; volt += 0.03
        elif action == 1:  # slow
            soc += 2.5; temp += 0.8; volt += 0.01
        else:  # pause
            soc -= 0.5; temp -= 0.8; volt -= 0.005

        soc = np.clip(soc, self.soc_min, self.soc_max)
        temp = np.clip(temp, self.temp_min, self.temp_max)
        volt = np.clip(volt, self.voltage_min, self.voltage_max)
        self.state = np.array([soc, temp, volt])
        self.t += 1

        def qsvm_predict_proba(raw_X):
            raw_X = np.atleast_2d(raw_X)
            Xs = scaler.transform(raw_X)
            psi_x = psi(Xs[0])
            K = np.array([np.abs(np.vdot(psi_x, pt))**2 for pt in psi_train]).reshape(1, -1)
            return svc.predict_proba(K)[0]

        probs = qsvm_predict_proba(self.state.reshape(1, -1))
        pred_class = np.argmax(probs) # 0=Low, 1=Medium, 2=High

        # --- CORRECTED & IMPROVED Reward Function ---
        reward = 0.0
        reward += 0.1 # Small reward for surviving a step

        if 20.0 <= soc <= 80.0:
            reward += 1.0
        else:
            reward -= 1.5

        if temp < 315.0:
            reward += 1.0
        elif 315.0 <= temp <= 325.0:
            reward -= 5.0  # Medium penalty for getting warm
        else:  # temp > 325.0
            reward -= 15.0 # Severe penalty for being hot

        # Make action penalties extremely clear
        if pred_class == 2 and action == 0:
            reward -= 25.0  # HUGE PENALTY for fast charging during high risk
        elif pred_class == 1 and action == 0:
            reward -= 10.0 # Significant penalty for fast charging during medium risk

        # Reward taking the CORRECT actions
        if pred_class == 2 and action == 2: # Pause when risk is high
            reward += 10.0
        elif pred_class == 1 and action == 1: # Slow charge when risk is medium
            reward += 5.0
        
        done = self.t >= self.max_steps or temp > 330.0 or soc <= 5.0
        if done and (temp > 330.0 or soc <= 5.0):
            reward -= 10.0 # Extra penalty for ending in a bad state

        return self._get_obs(), reward, done, {"qsvm_pred": pred_class, "qsvm_probs": probs}

# ---------------------------
# 5️⃣ RL Agent (DQN) - No changes needed here
# ---------------------------
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) # Increased memory size for better learning
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.995 # Slower decay to encourage more exploration
        self.lr = 1e-3
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = models.Sequential([
            layers.Input(shape=(self.state_size,)),
            layers.Dense(32, activation='relu'),
            layers.Dense(32, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=optimizers.Adam(learning_rate=self.lr), loss='mse')
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, s, a, r, s2, done):
        self.memory.append((s, a, r, s2, done))

    def act(self, s):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_vals = self.model.predict(s.reshape(1, -1), verbose=0)[0]
        return np.argmax(q_vals)

    def replay(self, batch_size=32): # Slightly larger batch size
        if len(self.memory) < batch_size:
            return
        batch = random.sample(self.memory, batch_size)
        states = np.array([b[0] for b in batch])
        actions = np.array([b[1] for b in batch])
        rewards = np.array([b[2] for b in batch])
        next_states = np.array([b[3] for b in batch])
        dones = np.array([b[4] for b in batch])

        target = self.model.predict(states, verbose=0)
        t_next = self.target_model.predict(next_states, verbose=0)

        for i in range(len(batch)):
            target[i, actions[i]] = rewards[i] if dones[i] else rewards[i] + self.gamma * np.max(t_next[i])

        self.model.train_on_batch(states, target)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# ---------------------------
# 6️⃣ Train RL Agent (Longer Training)
# ---------------------------
env = SimpleBatteryEnv()
state_size = 3
action_size = 3
agent = DQNAgent(state_size, action_size)

# *** INCREASED EPISODES FOR BETTER TRAINING ***
n_episodes = 50
target_update_freq = 5
batch_size = 32

for ep in range(n_episodes):
    state = env.reset()
    total_reward = 0
    done = False
    while not done:
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        agent.replay(batch_size)
    if ep % target_update_freq == 0:
        agent.update_target_model()
    print(f"Episode {ep+1}/{n_episodes} | Total Reward={total_reward:.2f} | Epsilon={agent.epsilon:.3f}")

# ---------------------------
# 7️⃣ Save Models & Artifacts
# ---------------------------
SAVE_DIR = r"D:\Programming\VidyutAiHackathon\models"
os.makedirs(SAVE_DIR, exist_ok=True)

joblib.dump(scaler, os.path.join(SAVE_DIR, "scaler1.joblib"))
joblib.dump(svc, os.path.join(SAVE_DIR, "qsvm_svc1.joblib"))
np.save(os.path.join(SAVE_DIR, "X_train_s1.npy"), X_train_s)
np.save(os.path.join(SAVE_DIR, "psi_train1.npy"), psi_train)
agent.model.save(os.path.join(SAVE_DIR, "dqn_agent_model1.h5"))

print(f"\n✅ All models saved to: {SAVE_DIR}")

Dataset shape: (1152, 4)
          SoC  Temperature   Voltage  Label
0  100.000000   298.150000  4.014300      0
1   99.173138   298.849283  3.916820      0
2   98.346276   299.665201  3.887562      0
3   97.519413   300.497825  3.877287      0
4   96.692551   301.327592  3.870545      0
Computing psi_train (quantum states for training samples)...


  out = df.groupby(label_col, group_keys=False).apply(


psi_train shape: (120, 8)
Computing train kernel...
Computing test kernel...
QSVM accuracy: 0.7333333333333333
              precision    recall  f1-score   support

           0       0.56      0.90      0.69        10
           1       0.75      0.30      0.43        10
           2       1.00      1.00      1.00        10

    accuracy                           0.73        30
   macro avg       0.77      0.73      0.71        30
weighted avg       0.77      0.73      0.71        30

Episode 1/50 | Total Reward=19.50 | Epsilon=1.000
Episode 2/50 | Total Reward=-81.50 | Epsilon=0.956
Episode 3/50 | Total Reward=42.00 | Epsilon=0.865
Episode 4/50 | Total Reward=-97.50 | Epsilon=0.782
Episode 5/50 | Total Reward=32.00 | Epsilon=0.708
Episode 6/50 | Total Reward=34.50 | Epsilon=0.640
Episode 7/50 | Total Reward=42.00 | Epsilon=0.579
Episode 8/50 | Total Reward=-156.30 | Epsilon=0.532
Episode 9/50 | Total Reward=5.50 | Epsilon=0.481
Episode 10/50 | Total Reward=42.00 | Epsilon=0.435
Epis



Episode 50/50 | Total Reward=42.00 | Epsilon=0.050

✅ All models saved to: D:\Programming\VidyutAiHackathon\models


In [15]:
#BHI 
import pandas as pd

# df has: SoC, Voltage, Temperature, Label
df = pd.read_csv('D:\Programming\VidyutAiHackathon\data\Full-Data.csv')

# Example nominal voltage curve (approximate)
nominal_voltage = {
    0: 3.0, 10: 3.3, 20: 3.5, 40: 3.7, 60: 3.9, 80: 4.0, 100: 4.2
}

def estimate_nominal_voltage(soc):
    keys = sorted(nominal_voltage.keys())
    for i in range(len(keys)-1):
        if keys[i] <= soc <= keys[i+1]:
            v1, v2 = nominal_voltage[keys[i]], nominal_voltage[keys[i+1]]
            return v1 + (v2 - v1) * ((soc - keys[i]) / (keys[i+1] - keys[i]))
    return nominal_voltage[keys[-1]]

df['V_nominal'] = df['SoC'].apply(estimate_nominal_voltage)
df['BHI_point'] = (df['Voltage'] / df['V_nominal']) * 100
BHI_overall = df['BHI_point'].mean()
print("Estimated BHI: {:.2f}%".format(BHI_overall))


Estimated BHI: 60.52%


In [18]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score

# Encode label to numeric BHI-like values
encoder = LabelEncoder()
df['BHI'] = encoder.fit_transform(df['Label'])  # approximate numeric mapping

X = df[['SoC', 'Temperature', 'Voltage']]
y = df['BHI']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = RandomForestRegressor()
model.fit(X_train, y_train)
print("Model accuracy (R²):", r2_score(y_test, model.predict(X_test)))


Model accuracy (R²): 0.9355282051282051


In [20]:
!pip install dash dash-bootstrap-components

Collecting dash
  Downloading dash-3.2.0-py3-none-any.whl.metadata (10 kB)
Collecting dash-bootstrap-components
  Downloading dash_bootstrap_components-2.0.4-py3-none-any.whl.metadata (18 kB)
Collecting importlib-metadata (from dash)
  Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.2-py3-none-any.whl.metadata (5.5 kB)
Collecting zipp>=3.20 (from importlib-metadata->dash)
  Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)
Downloading dash-3.2.0-py3-none-any.whl (7.9 MB)
   ---------------------------------------- 0.0/7.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.9 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.9 MB 621.2 kB/s eta 0:00:12
   -- ------------------------------------- 0.5/7.9 MB 621.2 kB/s eta 0:00:12
