In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Semiconductor Reliability Analysis\n",
    "## Exploring sensor data and building failure prediction models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "sys.path.append('../src')\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from data_generator import SemiconductorDataGenerator\n",
    "from model import ReliabilityPredictor\n",
    "\n",
    "plt.style.use('seaborn-v0_8')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data Generation and Exploration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate semiconductor sensor data\n",
    "generator = SemiconductorDataGenerator(n_samples=1000, n_devices=5)\n",
    "data = generator.generate_data()\n",
    "\n",
    "print(f\"Dataset shape: {data.shape}\")\n",
    "print(f\"Columns: {list(data.columns)}\")\n",
    "print(f\"Devices: {data['device_id'].unique()}\")\n",
    "print(f\"Failed samples: {data['failed'].sum()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data overview\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Statistical summary\n",
    "data.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Exploratory Data Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sensor trends over time\n",
    "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
    "sensors = ['temperature', 'voltage', 'current', 'leakage']\n",
    "colors = ['red', 'blue', 'green', 'purple', 'orange']\n",
    "\n",
    "for i, sensor in enumerate(sensors):\n",
    "    ax = axes[i//2, i%2]\n",
    "    \n",
    "    for j, device_id in enumerate(data['device_id'].unique()):\n",
    "        device_data = data[data['device_id'] == device_id]\n",
    "        ax.plot(device_data['time'], device_data[sensor], \n",
    "               alpha=0.7, label=f'Device {device_id}', color=colors[j])\n",
    "    \n",
    "    ax.set_title(f'{sensor.title()} Trends Over Time', fontsize=12)\n",
    "    ax.set_xlabel('Time Steps')\n",
    "    ax.set_ylabel(sensor.title())\n",
    "    ax.legend()\n",
    "    ax.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation matrix\n",
    "correlation = data[['temperature', 'voltage', 'current', 'leakage', 'failed']].corr()\n",
    "\n",
    "plt.figure(figsize=(8, 6))\n",
    "sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0, square=True)\n",
    "plt.title('Sensor Correlation Matrix')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Model Training and Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare data for LSTM\n",
    "predictor = ReliabilityPredictor(sequence_length=50)\n",
    "X, y = predictor.prepare_sequences(data)\n",
    "\n",
    "print(f\"Sequence shape: {X.shape}\")\n",
    "print(f\"Target shape: {y.shape}\")\n",
    "print(f\"Positive samples: {y.sum()} ({y.mean():.2%})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train the model\n",
    "history = predictor.train(X, y, epochs=30)\n",
    "\n",
    "# Plot training history\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n",
    "\n",
    "ax1.plot(history.history['loss'], label='Training Loss', color='blue')\n",
    "ax1.plot(history.history['val_loss'], label='Validation Loss', color='red')\n",
    "ax1.set_title('Model Loss Over Epochs')\n",
    "ax1.set_xlabel('Epoch')\n",
    "ax1.set_ylabel('Loss')\n",
    "ax1.legend()\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "ax2.plot(history.history['accuracy'], label='Training Accuracy', color='blue')\n",
    "ax2.plot(history.history['val_accuracy'], label='Validation Accuracy', color='red')\n",
    "ax2.set_title('Model Accuracy Over Epochs')\n",
    "ax2.set_xlabel('Epoch')\n",
    "ax2.set_ylabel('Accuracy')\n",
    "ax2.legend()\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Model Predictions and Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make predictions\n",
    "predictions = predictor.predict(X)\n",
    "pred_binary = (predictions > 0.5).astype(int)\n",
    "\n",
    "# Calculate metrics\n",
    "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n",
    "\n",
    "accuracy = accuracy_score(y, pred_binary)\n",
    "precision = precision_score(y, pred_binary)\n",
    "recall = recall_score(y, pred_binary)\n",
    "f1 = f1_score(y, pred_binary)\n",
    "\n",
    "print(f\"Model Performance:\")\n",
    "print(f\"Accuracy:  {accuracy:.3f}\")\n",
    "print(f\"Precision: {precision:.3f}\")\n",
    "print(f\"Recall:    {recall:.3f}\")\n",
    "print(f\"F1-Score:  {f1:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Confusion matrix\n",
    "cm = confusion_matrix(y, pred_binary)\n",
    "\n",
    "plt.figure(figsize=(6, 5))\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', square=True)\n",
    "plt.title('Confusion Matrix')\n",
    "plt.xlabel('Predicted')\n",
    "plt.ylabel('Actual')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prediction visualization\n",
    "plt.figure(figsize=(12, 6))\n",
    "\n",
    "# Sample subset for visualization\n",
    "sample_size = min(1000, len(y))\n",
    "indices = np.random.choice(len(y), sample_size, replace=False)\n",
    "sample_y = y[indices]\n",
    "sample_pred = predictions[indices].flatten()\n",
    "\n",
    "plt.scatter(range(len(sample_y)), sample_y, alpha=0.6, s=20, label='Actual Failures', color='red')\n",
    "plt.scatter(range(len(sample_pred)), sample_pred, alpha=0.6, s=20, label='Predicted Probability', color='blue')\n",
    "plt.axhline(y=0.5, color='green', linestyle='--', alpha=0.7, label='Decision Threshold')\n",
    "plt.xlabel('Sample Index')\n",
    "plt.ylabel('Failure Probability')\n",
    "plt.title('Actual vs Predicted Failures')\n",
    "plt.legend()\n",
    "plt.grid(True, alpha=0.3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Reliability Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Time to failure analysis\n",
    "device_reliability = []\n",
    "\n",
    "for device_id in data['device_id'].unique():\n",
    "    device_data = data[data['device_id'] == device_id]\n",
    "    first_failure = device_data[device_data['failed'] == 1]\n",
    "    \n",
    "    if len(first_failure) > 0:\n",
    "        ttf = first_failure['time'].iloc[0]\n",
    "        reliability_at_500 = len(device_data[device_data['time'] <= 500]) / len(device_data)\n",
    "    else:\n",
    "        ttf = data['time'].max()  # Did not fail\n",
    "        reliability_at_500 = 1.0\n",
    "    \n",
    "    device_reliability.append({\n",
    "        'device_id': device_id,\n",
    "        'time_to_failure': ttf,\n",
    "        'reliability_500': reliability_at_500\n",
    "    })\n",
    "\n",
    "reliability_df = pd.DataFrame(device_reliability)\n",
    "print(\"Device Reliability Summary:\")\n",
    "print(reliability_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reliability visualization\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n",
    "\n",
    "# Time to failure\n",
    "ax1.bar(reliability_df['device_id'], reliability_df['time_to_failure'], color='skyblue')\n",
    "ax1.set_xlabel('Device ID')\n",
    "ax1.set_ylabel('Time to Failure')\n",
    "ax1.set_title('Time to Failure by Device')\n",
    "ax1.grid(True, alpha=0.3)\n",
    "\n",
    "# Failure probability over time\n",
    "time_bins = np.arange(0, data['time'].max(), 50)\n",
    "failure_rates = []\n",
    "\n",
    "for t in time_bins:\n",
    "    window_data = data[(data['time'] >= t) & (data['time'] < t + 50)]\n",
    "    if len(window_data) > 0:\n",
    "        failure_rate = window_data['failed'].mean()\n",
    "        failure_rates.append(failure_rate)\n",
    "    else:\n",
    "        failure_rates.append(0)\n",
    "\n",
    "ax2.plot(time_bins, failure_rates, marker='o', color='red')\n",
    "ax2.set_xlabel('Time')\n",
    "ax2.set_ylabel('Failure Rate')\n",
    "ax2.set_title('Failure Rate Over Time')\n",
    "ax2.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Save Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save generated data\n",
    "data.to_csv('../data/sample_data.csv', index=False)\n",
    "print(\"Data saved to ../data/sample_data.csv\")\n",
    "\n",
    "# Save model predictions\n",
    "results_df = pd.DataFrame({\n",
    "    'actual': y,\n",
    "    'predicted_prob': predictions.flatten(),\n",
    "    'predicted_binary': pred_binary.flatten()\n",
    "})\n",
    "results_df.to_csv('../data/predictions.csv', index=False)\n",
    "print(\"Predictions saved to ../data/predictions.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}