In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# AADS_UNESCO_Risk_Classification_Baseline

**Author:** Javed Yasin  
**Project:** JAVED_UNESCO_AADS_2025 (BLUE_ROBOT)  
**Goal:** Build a beginner baseline for industrial risk classification to practice AADS research skills.

Notebook structure:
1. Dataset creation (synthetic)
2. Exploratory Data Analysis (EDA)
3. Train Decision Tree & Random Forest
4. Evaluate and visualize results
5. Save models and results
6. Short Abstract/Introduction template for paper practice

**How to use:** Run code cells top-to-bottom. When finished, copy the printed `Classification Report` and the one-paragraph summary you write (below) and paste them into the chat for review.


In [None]:
# === Imports ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# For reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)


In [None]:
# === Imports ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# For reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)


In [None]:
# === 1) Create synthetic sensor dataset ===
N = 2000

temperature = np.random.normal(70, 12, N)   # degrees C
vibration   = np.random.normal(5, 3, N)     # vibration units
water_level = np.random.normal(10, 6, N)    # cm
gas_leak    = np.random.normal(20, 12, N)   # ppm
intrusion   = np.random.choice([0,1], N, p=[0.92,0.08])  # rare intrusion

labels = []
for i in range(N):
    if water_level[i] > 25:
        labels.append(1)   # flood risk
    elif vibration[i] > 12 or temperature[i] > 110:
        labels.append(2)   # equipment failure
    elif intrusion[i] == 1:
        labels.append(3)   # intrusion
    else:
        labels.append(0)   # normal

df = pd.DataFrame({
    "temperature": temperature,
    "vibration": vibration,
    "water_level": water_level,
    "gas_leak": gas_leak,
    "intrusion": intrusion,
    "label": labels
})

print("Dataset created. Shape:", df.shape)
df.head()


In [None]:
# === 2) EDA ===
display(df.describe().round(2))

# Class distribution
counts = df['label'].value_counts().sort_index()
label_names = {0: "Normal", 1: "FloodRisk", 2: "EquipFail", 3: "Intrusion"}
print("\nClass distribution:")
for k,v in counts.items():
    print(f"  {k} ({label_names[k]}): {v} samples")

plt.figure(figsize=(8,4))
sns.countplot(x='label', data=df)
plt.xticks(ticks=[0,1,2,3], labels=[label_names[i] for i in [0,1,2,3]])
plt.title("Class distribution")
plt.show()


In [None]:
# === 3) Prepare data ===
X = df.drop("label", axis=1)
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# Save dataset to CSV for record
os.makedirs("artifacts", exist_ok=True)
df.to_csv("artifacts/synthetic_irm_dataset.csv", index=False)
print("Train/test shapes:", X_train.shape, X_test.shape)


In [None]:
# === 4) Decision Tree (baseline) ===
dt = DecisionTreeClassifier(random_state=RANDOM_SEED, max_depth=6)
dt.fit(X_train_scaled, y_train)

y_pred_dt = dt.predict(X_test_scaled)

print("Decision Tree - Classification Report:")
print(classification_report(y_test, y_pred_dt, target_names=[label_names[i] for i in [0,1,2,3]]))

# Simple visualization of tree (optional)
plt.figure(figsize=(12,6))
plot_tree(dt, feature_names=X.columns, class_names=[label_names[i] for i in [0,1,2,3]], filled=True, fontsize=8)
plt.title("Decision Tree (max_depth=6)")
plt.show()

# Save model
joblib.dump(dt, "artifacts/decision_tree_model.pkl")


In [None]:
# === 5) Random Forest ===
rf = RandomForestClassifier(n_estimators=150, random_state=RANDOM_SEED)
rf.fit(X_train_scaled, y_train)

y_pred_rf = rf.predict(X_test_scaled)

print("Random Forest - Classification Report:")
print(classification_report(y_test, y_pred_rf, target_names=[label_names[i] for i in [0,1,2,3]]))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred_rf)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=[label_names[i] for i in [0,1,2,3]],
            yticklabels=[label_names[i] for i in [0,1,2,3]])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Random Forest - Confusion Matrix")
plt.show()

# Feature importance
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(8,4))
sns.barplot(x=importances[indices], y=X.columns[indices])
plt.title("Random Forest Feature Importance")
plt.tight_layout()
plt.show()

# Save RF model
joblib.dump(rf, "artifacts/random_forest_model.pkl")


In [None]:
# === 6) Save results summary ===
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def save_model_results(model_name, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    return {"model": model_name, "accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

results = []
results.append(save_model_results("DecisionTree", y_test, y_pred_dt))
results.append(save_model_results("RandomForest", y_test, y_pred_rf))

results_df = pd.DataFrame(results)
results_df.to_csv("artifacts/model_comparison_results.csv", index=False)
display(results_df.round(4))
print("Saved model artifacts in ./artifacts/")


## Abstract (write 2–4 sentences in English)
Type your short abstract below (I will correct it). Example starter sentence:
> This notebook presents a baseline implementation of an industrial risk classification module as part of my AADS research. I simulate sensor data, train baseline classifiers (Decision Tree and Random Forest), and evaluate their performance.

## Introduction (write 3–6 sentences in English)
Type your introduction below (I will correct and polish it). Example start:
> Intelligent Autonomous Decision Systems (AADS) support emergency management by analyzing sensor streams and recommending actions. In this study I build a simple prototype to classify risk levels using simulated data as a first step toward more realistic IDSS experiments.


In [None]:
import joblib

# Load models from artifacts
dt_model = joblib.load("./artifacts/DecisionTree_model.pkl")
rf_model = joblib.load("./artifacts/RandomForest_model.pkl")


In [None]:
import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd

# Example dataset (replace with your dataset file)
df = pd.read_csv("your_dataset.csv")

X = df.drop("label", axis=1)
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
dt_model = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)

# Save models
joblib.dump(dt_model, "DecisionTree_model.pkl")
joblib.dump(rf_model, "RandomForest_model.pkl")

print("✅ Models trained and saved successfully!")


In [None]:
import joblib
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris  # example dataset

# === Load dataset (Iris dataset) ===
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
dt_model = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42).fit(X_train, y_train)

# Save models
joblib.dump(dt_model, "DecisionTree_model.pkl")
joblib.dump(rf_model, "RandomForest_model.pkl")

print("✅ Models trained and saved successfully!")


In [None]:
import joblib
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load models
dt_model = joblib.load("DecisionTree_model.pkl")
rf_model = joblib.load("RandomForest_model.pkl")

# Predictions
dt_pred = dt_model.predict(X_test)
rf_pred = rf_model.predict(X_test)

# Reports
print("=== Decision Tree Report ===")
print(classification_report(y_test, dt_pred))

print("=== Random Forest Report ===")
print(classification_report(y_test, rf_pred))

# Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.heatmap(confusion_matrix(y_test, dt_pred), annot=True, fmt="d", cmap="Blues", ax=axes[0])
axes[0].set_title("Decision Tree Confusion Matrix")
sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, fmt="d", cmap="Greens", ax=axes[1])
axes[1].set_title("Random Forest Confusion Matrix")
plt.show()


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Collect metrics
results = {
    "Model": ["DecisionTree", "RandomForest"],
    "Accuracy": [
        accuracy_score(y_test, dt_pred),
        accuracy_score(y_test, rf_pred)
    ],
    "Precision": [
        precision_score(y_test, dt_pred, average="weighted"),
        precision_score(y_test, rf_pred, average="weighted")
    ],
    "Recall": [
        recall_score(y_test, dt_pred, average="weighted"),
        recall_score(y_test, rf_pred, average="weighted")
    ],
    "F1 Score": [
        f1_score(y_test, dt_pred, average="weighted"),
        f1_score(y_test, rf_pred, average="weighted")
    ]
}

df_results = pd.DataFrame(results)
df_results.to_csv("model_comparison_results.csv", index=False)
print("✅ Results saved to model_comparison_results.csv")


In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table
from reportlab.lib.styles import getSampleStyleSheet

# Create PDF
doc = SimpleDocTemplate("Model_Comparison_Report.pdf", pagesize=letter)
styles = getSampleStyleSheet()
flow = []

flow.append(Paragraph("Model Comparison Report", styles['Title']))
flow.append(Spacer(1, 12))

# Add table
table_data = [df_results.columns.to_list()] + df_results.values.tolist()
table = Table(table_data)
flow.append(table)

flow.append(Spacer(1, 24))
flow.append(Paragraph("This report compares Decision Tree and Random Forest models on the Iris dataset.", styles['Normal']))

doc.build(flow)
print("✅ PDF report created: Model_Comparison_Report.pdf")


In [None]:
!pip install reportlab


In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table
from reportlab.lib.styles import getSampleStyleSheet

# Create PDF
doc = SimpleDocTemplate("Model_Comparison_Report.pdf", pagesize=letter)
styles = getSampleStyleSheet()
flow = []

flow.append(Paragraph("Model Comparison Report", styles['Title']))
flow.append(Spacer(1, 12))

# Add table
table_data = [df_results.columns.to_list()] + df_results.values.tolist()
table = Table(table_data)
flow.append(table)

flow.append(Spacer(1, 24))
flow.append(Paragraph("This report compares Decision Tree and Random Forest models on the Iris dataset.", styles['Normal']))

doc.build(flow)
print("✅ PDF report created: Model_Comparison_Report.pdf")


In [None]:
!pip install python-docx


In [None]:
from docx import Document

# Create a new Word document
doc = Document()

# Title
doc.add_heading("Model Comparison Report", level=1)

# Intro
doc.add_paragraph(
    "This report compares the performance of Decision Tree and Random Forest models "
    "on the Iris dataset. Metrics such as Accuracy, Precision, Recall, and F1 Score are included."
)

# Add table
table = doc.add_table(rows=1, cols=len(df_results.columns))
hdr_cells = table.rows[0].cells
for i, col_name in enumerate(df_results.columns):
    hdr_cells[i].text = col_name

# Add rows
for row in df_results.values:
    row_cells = table.add_row().cells
    for i, val in enumerate(row):
        row_cells[i].text = str(round(val, 4))

# Save file
doc.save("Model_Comparison_Report.docx")
print("✅ Word report created: Model_Comparison_Report.docx")


In [None]:
from docx import Document

# Create a new Word document
doc = Document()

# Title
doc.add_heading("Model Comparison Report", level=1)

# Intro
doc.add_paragraph(
    "This report compares the performance of Decision Tree and Random Forest models "
    "on the Iris dataset. Metrics such as Accuracy, Precision, Recall, and F1 Score are included."
)

# Add table
table = doc.add_table(rows=1, cols=len(df_results.columns))
hdr_cells = table.rows[0].cells
for i, col_name in enumerate(df_results.columns):
    hdr_cells[i].text = col_name

# Add rows
for row in df_results.values:
    row_cells = table.add_row().cells
    for i, val in enumerate(row):
        if isinstance(val, (float, int)):  # numbers → round
            row_cells[i].text = str(round(val, 4))
        else:  # strings → keep as is
            row_cells[i].text = str(val)

# Save file
doc.save("Model_Comparison_Report.docx")
print("✅ Word report created: Model_Comparison_Report.docx")


In [None]:
doc.save("/kaggle/working/Model_Comparison_Report.docx")


In [None]:
from IPython.display import FileLink

# For Word
FileLink("/kaggle/working/Model_Comparison_Report.docx")

# For PDF
FileLink("/kaggle/working/Model_Comparison_Report.pdf")


In [None]:
from IPython.display import FileLink, FileLinks

# ✅ Option 1: single file link
FileLink("Model_Comparison_Report.docx")

# ✅ Option 2: show all downloadable files in /kaggle/working
FileLinks("/kaggle/working")


In [None]:
doc.save("/kaggle/working/Model_Comparison_Report.docx")


In [None]:
from IPython.display import FileLink, FileLinks

# ✅ Direct link to your Word report
FileLink("/kaggle/working/Model_Comparison_Report.docx")

# ✅ (Optional) show all files in working directory
FileLinks("/kaggle/working")


In [None]:
from IPython.display import FileLink, FileLinks

# Show download link for Word report
display(FileLink("/kaggle/working/Model_Comparison_Report.docx"))

# Show download link for PDF report
display(FileLink("/kaggle/working/Model_Comparison_Report.pdf"))

# Show all files available in working directory
FileLinks("/kaggle/working")


In [None]:
import joblib
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset (replace with your actual dataset file)
df = pd.read_csv("your_dataset.csv")

# Load Random Forest model
rf_model = joblib.load("RandomForest_model.pkl")


In [None]:
# Feature importance
importances = rf_model.feature_importances_
features = df.drop("label", axis=1).columns  

# Plot
plt.figure(figsize=(8,5))
plt.barh(features, importances, color="skyblue")
plt.xlabel("Importance")
plt.title("Feature Importance - Random Forest")
plt.show()


In [None]:
import pandas as pd
import numpy as np

# Dummy dataset with 3 features and a binary label
np.random.seed(42)
df = pd.DataFrame({
    "temperature": np.random.randint(20, 40, 100),
    "pressure": np.random.randint(80, 120, 100),
    "vibration": np.random.randint(1, 10, 100),
    "label": np.random.randint(0, 2, 100)  # target
})

df.head()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

X = df.drop("label", axis=1)
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save model
joblib.dump(rf_model, "RandomForest_model.pkl")
print("✅ Random Forest trained and saved")


In [None]:
import matplotlib.pyplot as plt

importances = rf_model.feature_importances_
features = X.columns  

plt.figure(figsize=(8,5))
plt.barh(features, importances, color="skyblue")
plt.xlabel("Importance")
plt.title("Feature Importance - Random Forest")
plt.show()


In [None]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

joblib.dump(dt_model, "DecisionTree_model.pkl")
print("✅ Decision Tree trained and saved")


In [None]:
# Random Forest
rf_importances = rf_model.feature_importances_

# Decision Tree
dt_importances = dt_model.feature_importances_

# Put into DataFrame
importance_df = pd.DataFrame({
    "Feature": X.columns,
    "DecisionTree": dt_importances,
    "RandomForest": rf_importances
})

print(importance_df)


In [None]:
importance_df.set_index("Feature").plot(kind="bar", figsize=(8,5))
plt.title("Feature Importance: Decision Tree vs Random Forest")
plt.ylabel("Importance")
plt.show()


In [None]:
!pip install python-docx


In [None]:
from docx import Document
from docx.shared import Inches
import matplotlib.pyplot as plt

# Create Word document
doc = Document()
doc.add_heading("Model Comparison Report", 0)

# Section 1: Dataset
doc.add_heading("1. Dataset", level=1)
doc.add_paragraph("The dataset was used for classification with input features (X) and target label (y). "
                  "Data was split into training and test sets using 80/20 ratio.")

# Section 2: Model Performance
doc.add_heading("2. Model Performance", level=1)
doc.add_paragraph("The following table shows the performance metrics for Decision Tree and Random Forest models:")

# Add table with results from your first task
results = [
    ["Model", "Accuracy", "Precision", "Recall", "F1-Score"],
    ["DecisionTree", "1.0000", "1.0000", "1.0000", "1.0000"],
    ["RandomForest", "0.9975", "0.9976", "0.9975", "0.9973"]
]
table = doc.add_table(rows=1, cols=len(results[0]))
table.style = 'LightShading-Accent1'

# Header row
hdr_cells = table.rows[0].cells
for i, val in enumerate(results[0]):
    hdr_cells[i].text = val

# Data rows
for row in results[1:]:
    row_cells = table.add_row().cells
    for i, val in enumerate(row):
        row_cells[i].text = val

# Section 3: Feature Importance
doc.add_heading("3. Feature Importance Comparison", level=1)
doc.add_paragraph("We compared the feature importance scores of Decision Tree and Random Forest. "
                  "The chart below shows their differences:")

# Plot and save feature importance chart
importance_df.set_index("Feature").plot(kind="bar", figsize=(8,5))
plt.title("Feature Importance: Decision Tree vs Random Forest")
plt.ylabel("Importance")
plt.tight_layout()
plt.savefig("feature_importance.png")
plt.close()

# Add chart image to Word
doc.add_picture("feature_importance.png", width=Inches(5))

# Section 4: Conclusion
doc.add_heading("4. Conclusion", level=1)
doc.add_paragraph(
    "The Decision Tree achieved perfect training performance, but Random Forest provided more stable feature importance values. "
    "Random Forest is generally preferred for research as it reduces overfitting and provides more reliable feature ranking."
)

# Save Word report
doc.save("/kaggle/working/Model_Comparison_Report.docx")
print("✅ Word report created: Model_Comparison_Report.docx")


In [None]:
from IPython.display import FileLink
FileLink("/kaggle/working/Model_Comparison_Report.docx")


In [None]:
doc.save("/kaggle/working/Model_Comparison_Report.docx")


In [None]:
Model_Comparison_Report.docx


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 1) Define Neural Net
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 2) Prepare Data
X = df.drop("label", axis=1).values
y = df["label"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

# 3) Train Neural Net
model = SimpleNN(input_dim=X_train.shape[1], hidden_dim=64, output_dim=len(set(y)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(20):  # small epochs for practice
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()

# 4) Evaluate
with torch.no_grad():
    preds = model(X_test_torch).argmax(axis=1).numpy()

acc = accuracy_score(y_test, preds)
prec = precision_score(y_test, preds, average="weighted")
rec = recall_score(y_test, preds, average="weighted")
f1 = f1_score(y_test, preds, average="weighted")

print("Neural Network Results:", acc, prec, rec, f1)


In [None]:
# === 1) Imports ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import torch
import torch.nn as nn
import torch.optim as optim

# === 2) Load Dataset ===
df = pd.read_csv("/kaggle/input/your_dataset.csv")  # change filename if needed

X = df.drop("label", axis=1).values
y = df["label"].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === 3) Feature Scaling ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 4) Baseline Models ===
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(random_state=42)

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)

# Evaluate baseline
dt_preds = dt.predict(X_test)
rf_preds = rf.predict(X_test)

print("Decision Tree:", 
      accuracy_score(y_test, dt_preds), 
      precision_score(y_test, dt_preds, average='macro'),
      recall_score(y_test, dt_preds, average='macro'),
      f1_score(y_test, dt_preds, average='macro'))

print("Random Forest:", 
      accuracy_score(y_test, rf_preds), 
      precision_score(y_test, rf_preds, average='macro'),
      recall_score(y_test, rf_preds, average='macro'),
      f1_score(y_test, rf_preds, average='macro'))

# === 5) Neural Network ===
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(np.unique(y)))  # output = num classes
        )
    def forward(self, x):
        return self.layers(x)

# Model, Loss, Optimizer
input_dim = X_train_scaled.shape[1]
model = SimpleNN(input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert to torch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

# Training loop
for epoch in range(100):  # 100 epochs
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 20 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Evaluation
with torch.no_grad():
    outputs = model(X_test_torch)
    _, preds = torch.max(outputs, 1)

nn_acc = accuracy_score(y_test, preds)
nn_prec = precision_score(y_test, preds, average='macro', zero_division=0)
nn_rec = recall_score(y_test, preds, average='macro', zero_division=0)
nn_f1 = f1_score(y_test, preds, average='macro', zero_division=0)

print("Ne


In [None]:
# Evaluation
with torch.no_grad():
    outputs = model(X_test_torch)
    _, preds = torch.max(outputs, 1)

nn_acc = accuracy_score(y_test, preds)
nn_prec = precision_score(y_test, preds, average='macro', zero_division=0)
nn_rec = recall_score(y_test, preds, average='macro', zero_division=0)
nn_f1 = f1_score(y_test, preds, average='macro', zero_division=0)

print("Neural Network:", nn_acc, nn_prec, nn_rec, nn_f1)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
import torch


In [None]:
# === 1) Imports ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import torch
import torch.nn as nn
import torch.optim as optim

# === 2) Load Dataset ===
df = pd.read_csv("/kaggle/input/your_dataset.csv")  # change filename if needed

X = df.drop("label", axis=1).values
y = df["label"].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === 3) Feature Scaling ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 4) Baseline Models ===
dt = DecisionTreeClassifier(random_state=42)
rf = RandomForestClassifier(random_state=42)

dt.fit(X_train, y_train)
rf.fit(X_train, y_train)

# Evaluate baseline
dt_preds = dt.predict(X_test)
rf_preds = rf.predict(X_test)

print("Decision Tree:", 
      accuracy_score(y_test, dt_preds), 
      precision_score(y_test, dt_preds, average='macro'),
      recall_score(y_test, dt_preds, average='macro'),
      f1_score(y_test, dt_preds, average='macro'))

print("Random Forest:", 
      accuracy_score(y_test, rf_preds), 
      precision_score(y_test, rf_preds, average='macro'),
      recall_score(y_test, rf_preds, average='macro'),
      f1_score(y_test, rf_preds, average='macro'))

# === 5) Neural Network ===
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(np.unique(y)))  # output = num classes
        )
    def forward(self, x):
        return self.layers(x)

# Model, Loss, Optimizer
input_dim = X_train_scaled.shape[1]
model = SimpleNN(input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert to torch tensors
X_train_torch = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
X_test_torch = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

# Training loop
for epoch in range(100):  # 100 epochs
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 20 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Evaluation
with torch.no_grad():
    outputs = model(X_test_torch)
    _, preds = torch.max(outputs, 1)

nn_acc = accuracy_score(y_test, preds)
nn_prec = precision_score(y_test, preds, average='macro', zero_division=0)
nn_rec = recall_score(y_test, preds, average='macro', zero_division=0)
nn_f1 = f1_score(y_test, preds, average='macro', zero_division=0)

print("Neural Network:", nn_acc, nn_prec, nn_rec, nn_f1)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = pd.read_csv("/kaggle/input/iris/Iris.csv")

# Features (X) and target (y)
X = df.drop(["Id", "Species"], axis=1).values
y = LabelEncoder().fit_transform(df["Species"])  # Convert labels to 0,1,2

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Convert to tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

# Define NN
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(4, 16)   # input = 4 features
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 3)    # output = 3 classes

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


In [None]:
# Training loop
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")


In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Predictions
with torch.no_grad():
    y_pred = model(X_test_torch)
    _, preds = torch.max(y_pred, 1)

# Evaluation
print("Accuracy:", accuracy_score(y_test, preds))
print("\nClassification Report:\n", classification_report(y_test, preds, target_names=df["Species"].unique()))
