In [22]:
import heapq
import math

def a_star(start, goal, graph, heuristic):
    open_list = []
    heapq.heappush(open_list, (0, start))

    came_from = {}
    g_score = {node: float('inf') for node in graph}
    g_score[start] = 0

    while open_list:
        current_f, current = heapq.heappop(open_list)

        if current == goal:
            path = []
            while current in came_from:
                path.append(current)
                current = came_from[current]
            path.append(start)
            path.reverse()
            return path, g_score[goal]

        for neighbor, cost in graph[current].items():
            tentative_g = g_score[current] + cost
            if tentative_g < g_score[neighbor]:
                came_from[neighbor] = current
                g_score[neighbor] = tentative_g
                f_score = tentative_g + heuristic[neighbor]
                heapq.heappush(open_list, (f_score, neighbor))
    return None, float('inf')

graph = {
    'A': {'B': 4, 'C': 3},
    'B': {'A': 4, 'D': 2, 'E': 5},
    'C': {'A': 3, 'D': 6},
    'D': {'B': 2, 'C': 6, 'E': 1, 'F': 7},
    'E': {'B': 5, 'D': 1, 'F': 3},
    'F': {'D': 7, 'E': 3}
}

heuristic = {
    'A': 10,
    'B': 8,
    'C': 9,
    'D': 5,
    'E': 3,
    'F': 0
}

start = 'A'
goal = 'F'
path, cost = a_star(start, goal, graph, heuristic)

print("Shortest Path found by A*:", path)
print("Total Cost:", cost)

Shortest Path found by A*: ['A', 'B', 'D', 'E', 'F']
Total Cost: 10


In [2]:
from collections import deque

def bfs_shortest_path(maze, start, goal):
    rows, cols = len(maze), len(maze[0])
    visited = [[False]*cols for _ in range(rows)]
    parent = [[None]*cols for _ in range(rows)]

    directions = [(-1,0), (1,0), (0,-1), (0,1)]

    queue = deque([start])
    visited[start[0]][start[1]] = True

    while queue:
        x, y = queue.popleft()

        if (x, y) == goal:
            path = []
            pos = goal
            while pos is not None:
                path.append(pos)
                pos = parent[pos[0]][pos[1]]
            path.reverse()
            return path

        for dx, dy in directions:
            nx, ny = x + dx, y + dy
            if 0 <= nx < rows and 0 <= ny < cols and not visited[nx][ny] and maze[nx][ny] == 0:
                visited[nx][ny] = True
                parent[nx][ny] = (x, y)
                queue.append((nx, ny))

    return None  # No path found

maze = [
    [0, 0, 1, 0, 0],
    [1, 0, 1, 0, 1],
    [0, 0, 0, 0, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 1, 0]
]

start = (0, 0)   # Starting cell
goal = (4, 4)    # Goal cell

path = bfs_shortest_path(maze, start, goal)

if path:
    print("Shortest path found using BFS:")
    print(path)
    print("Total steps:", len(path)-1)
else:
    print("No path found!")

Shortest path found using BFS:
[(0, 0), (0, 1), (1, 1), (2, 1), (2, 2), (2, 3), (2, 4), (3, 4), (4, 4)]
Total steps: 8


In [3]:
game_map = {
    'Start': ['Forest', 'Village'],
    'Forest': ['Cave', 'River'],
    'Village': ['Castle', 'Market'],
    'Cave': ['Treasure'],
    'River': [],
    'Castle': ['Dungeon'],
    'Market': [],
    'Dungeon': [],
    'Treasure': []
}

def dfs(graph, node, goal, visited, path, all_paths, order):
    visited.add(node)
    path.append(node)
    order.append(node)  

    if node == goal:
        all_paths.append(list(path))
    else:
        for neighbor in graph[node]:
            if neighbor not in visited:
                # We need to make a copy of visited for path exploration
                # In this specific recursive implementation, we add/remove
                dfs(graph, neighbor, goal, visited, path, all_paths, order)
            
    path.pop()
    visited.remove(node)

# To find ALL paths correctly, visited must be handled on a per-path basis
# The provided code finds paths but the 'visited' logic is flawed for finding *all* paths
# A better approach for finding all paths:
def find_all_paths_dfs(graph, node, goal, path, all_paths, order_visited):
    path = path + [node]
    
    if node not in order_visited:
        order_visited.append(node)

    if node == goal:
        all_paths.append(path)
    
    for neighbor in graph[node]:
        if neighbor not in path: # Avoid cycles in the current path
            find_all_paths_dfs(graph, neighbor, goal, path, all_paths, order_visited)

start = 'Start'
goal = 'Treasure'

all_paths = []
order = []
path = []

# Using the corrected function
find_all_paths_dfs(game_map, start, goal, path, all_paths, order)

print("DFS Traversal Order (First exploration):")
print(" → ".join(order))

print("\nAll possible paths from Start to Treasure:")
for i, p in enumerate(all_paths, 1):
    print(f"Path {i}: {' → '.join(p)}")

DFS Traversal Order (First exploration):
Start → Forest → Cave → Treasure → River → Village → Castle → Dungeon → Market

All possible paths from Start to Treasure:
Path 1: Start → Forest → Cave → Treasure


In [4]:
import heapq

def a_star(maze, start, goal):
    rows, cols = len(maze), len(maze[0])
    open_list = []
    heapq.heappush(open_list, (0, start))
    g_cost = {start: 0}
    came_from = {start: None}

    def heuristic(a, b):
        return abs(a[0]-b[0]) + abs(a[1]-b[1])

    while open_list:
        _, current = heapq.heappop(open_list)
        if current == goal:
            path = []
            while current is not None:
                path.append(current)
                current = came_from[current]
            return path[::-1]
        x, y = current
        for dx, dy in [(-1,0),(1,0),(0,-1),(0,1)]:
            nx, ny = x+dx, y+dy
            if 0 <= nx < rows and 0 <= ny < cols and maze[nx][ny] == 0:
                new_cost = g_cost[current] + 1
                neighbor = (nx, ny)
                if neighbor not in g_cost or new_cost < g_cost[neighbor]:
                    g_cost[neighbor] = new_cost
                    f_cost = new_cost + heuristic(neighbor, goal)
                    heapq.heappush(open_list, (f_cost, neighbor))
                    came_from[neighbor] = current
    return None

maze = [
    [0, 1, 0, 0, 0],
    [0, 1, 0, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
goal = (4, 4)
path = a_star(maze, start, goal)

if path:
    print("Path found:", path)
    print("Total cost:", len(path)-1)
else:
    print("No path found")

Path found: [(0, 0), (1, 0), (2, 0), (3, 0), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]
Total cost: 8


In [5]:
import heapq

def heuristic(state, goal):
    return sum(abs(s//3 - g//3) + abs(s%3 - g%3)
               for s, g in ((state.index(i), goal.index(i)) for i in range(1,9)))

def get_neighbors(state):
    neighbors = []
    i = state.index(0)
    x, y = divmod(i, 3)
    moves = [(-1,0),(1,0),(0,-1),(0,1)]
    for dx, dy in moves:
        nx, ny = x+dx, y+dy
        if 0 <= nx < 3 and 0 <= ny < 3:
            ni = nx*3+ny
            new_state = list(state)
            new_state[i], new_state[ni] = new_state[ni], new_state[i]
            neighbors.append(tuple(new_state))
    return neighbors

def a_star_8_puzzle(start, goal):
    open_list = []
    heapq.heappush(open_list, (heuristic(start, goal), 0, start))
    came_from = {start: None}
    g_cost = {start: 0}
    
    closed_set = set()

    while open_list:
        f, cost, current = heapq.heappop(open_list)
        
        if current in closed_set:
            continue
        closed_set.add(current)
        
        if current == goal:
            path = []
            while current:
                path.append(current)
                current = came_from[current]
            return path[::-1]
            
        for neighbor in get_neighbors(current):
            if neighbor in closed_set:
                continue
                
            new_cost = g_cost[current] + 1
            if neighbor not in g_cost or new_cost < g_cost[neighbor]:
                g_cost[neighbor] = new_cost
                f_cost = new_cost + heuristic(neighbor, goal)
                heapq.heappush(open_list, (f_cost, new_cost, neighbor))
                came_from[neighbor] = current
    return None

start = (1,2,3,4,0,6,7,5,8)
goal = (1,2,3,4,5,6,7,8,0)
path = a_star_8_puzzle(start, goal)

if path:
    print("Solution found:")
    for step in path:
        print(step[:3])
        print(step[3:6])
        print(step[6:])
        print("-" * 5)
    print("Total Steps:", len(path)-1)
else:
    print("No solution found.")

Solution found:
(1, 2, 3)
(4, 0, 6)
(7, 5, 8)
-----
(1, 2, 3)
(4, 5, 6)
(7, 0, 8)
-----
(1, 2, 3)
(4, 5, 6)
(7, 8, 0)
-----
Total Steps: 2


In [6]:
def print_board(b):
    for r in b:
        print(" | ".join(r))
        print("-----")

def check_winner(b):
    for r in b:
        if r[0] == r[1] == r[2] != " ":
            return r[0]
    for c in range(3):
        if b[0][c] == b[1][c] == b[2][c] != " ":
            return b[0][c]
    if b[0][0] == b[1][1] == b[2][2] != " ":
        return b[0][0]
    if b[0][2] == b[1][1] == b[2][0] != " ":
        return b[0][2]
    return None

board = [[" "]*3 for _ in range(3)]
player = "X"
turns = 0

while turns < 9:
    print_board(board)
    try:
        r = int(input(f"Enter row (0-2) for {player}: "))
        c = int(input(f"Enter col (0-2) for {player}: "))
        
        if not (0 <= r <= 2 and 0 <= c <= 2):
            print("Coordinates out of bounds. Try again.")
            continue
            
        if board[r][c] == " ":
            board[r][c] = player
            turns += 1
            winner = check_winner(board)
            if winner:
                print_board(board)
                print(f"Winner is {winner}")
                break
            player = "O" if player == "X" else "X"
        else:
            print("Cell taken, try again")
    except ValueError:
        print("Invalid input. Please enter a number (0-2).")

else:
    print_board(board)
    print("It's a Draw!")

  |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for X:  0
Enter col (0-2) for X:  0


X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  0
Enter col (0-2) for O:  0


Cell taken, try again
X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  0
Enter col (0-2) for O:  0


Cell taken, try again
X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  7
Enter col (0-2) for O:  7


Coordinates out of bounds. Try again.
X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  7
Enter col (0-2) for O:  7


Coordinates out of bounds. Try again.
X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  7
Enter col (0-2) for O:  


Invalid input. Please enter a number (0-2).
X |   |  
-----
  |   |  
-----
  |   |  
-----


Enter row (0-2) for O:  2
Enter col (0-2) for O:  1


X |   |  
-----
  |   |  
-----
  | O |  
-----


KeyboardInterrupt: Interrupted by user

In [7]:
def tower_of_hanoi(n, source, target, auxiliary):
    if n == 1:
        print(f"Move disk 1 from {source} to {target}")
        return
    tower_of_hanoi(n-1, source, auxiliary, target)
    print(f"Move disk {n} from {source} to {target}")
    tower_of_hanoi(n-1, auxiliary, target, source)

try:
    n = int(input("Enter number of disks: "))
    if n <= 0:
        print("Please enter a positive number of disks.")
    else:
        tower_of_hanoi(n, 'A', 'C', 'B')
except ValueError:
    print("Invalid input. Please enter a number.")

Enter number of disks:  2


Move disk 1 from A to B
Move disk 2 from A to C
Move disk 1 from B to C


In [8]:
from collections import deque

def water_jug(jug1_cap, jug2_cap, target):
    visited = set()
    q = deque([((0, 0), [(0, 0)])]) # Store (state, path)

    while q:
        (a, b), path = q.popleft()
        
        if (a, b) in visited:
            continue
        visited.add((a, b))

        if a == target or b == target:
            print("Goal reached!")
            print("Path of states (jug1, jug2):")
            for state in path:
                print(state)
            return path

        # 1. Fill jug1
        q.append(((jug1_cap, b), path + [(jug1_cap, b)]))
        # 2. Fill jug2
        q.append(((a, jug2_cap), path + [(a, jug2_cap)]))
        # 3. Empty jug1
        q.append(((0, b), path + [(0, b)]))
        # 4. Empty jug2
        q.append(((a, 0), path + [(a, 0)]))
        # 5. Pour jug1 to jug2
        pour_to_2 = min(a, jug2_cap - b)
        q.append(((a - pour_to_2, b + pour_to_2), path + [(a - pour_to_2, b + pour_to_2)]))
        # 6. Pour jug2 to jug1
        pour_to_1 = min(b, jug1_cap - a)
        q.append(((a + pour_to_1, b - pour_to_1), path + [(a + pour_to_1, b - pour_to_1)]))
        
    print("Goal cannot be reached.")
    return None

# Solve the 4-gallon jug, 3-gallon jug, get 2 gallons problem
water_jug(4, 3, 2)

Goal reached!
Path of states (jug1, jug2):
(0, 0)
(0, 3)
(3, 0)
(3, 3)
(4, 2)


[(0, 0), (0, 3), (3, 0), (3, 3), (4, 2)]

In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

try:
    df = pd.read_csv("uber.csv")
except FileNotFoundError:
    print("Error: 'uber.csv' not found.")
    print("Please download the dataset and place it in the same directory.")
    exit()
    
df = df.drop_duplicates().dropna()
numeric_df = df.select_dtypes(include=[np.number])

target_col = None
for col in ['fare_amount', 'price', 'amount', 'fare']:
    if col in numeric_df.columns:
        target_col = col
        break
if target_col is None:
    # Fallback if no obvious target column
    if numeric_df.shape[1] > 0:
        target_col = numeric_df.columns[0]
    else:
        raise ValueError("No numeric columns found in dataset.")

X = numeric_df.drop(columns=[target_col])
y = numeric_df[target_col]

print("Dataset Shape:", df.shape)
print("Target Variable:", target_col)
print("Feature Columns:", list(X.columns))

plt.figure(figsize=(10, 6))
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Correlation Heatmap")
plt.show()

plt.figure(figsize=(7, 4))
sns.histplot(y, kde=True, bins=30, color='skyblue')
plt.title(f"Distribution of {target_col}")
plt.xlabel(target_col)
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model1 = LinearRegression()
model1.fit(X_train_scaled, y_train)
y_pred1 = model1.predict(X_test_scaled)

r2_no_pca = r2_score(y_test, y_pred1)
rmse_no_pca = np.sqrt(mean_squared_error(y_test, y_pred1))

pca = PCA()
pca.fit(X_train_scaled)
explained = np.cumsum(pca.explained_variance_ratio_)
plt.figure(figsize=(7, 4))
plt.plot(range(1, len(explained) + 1), explained, marker='o')
plt.axhline(0.95, color='r', linestyle='--')
plt.title("Cumulative Explained Variance by PCA")
plt.xlabel("No. of Components")
plt.ylabel("Cumulative Variance")
plt.grid(True)
plt.show()

n_components_95 = int(np.searchsorted(explained, 0.95) + 1)
print(f"Components needed for 95% variance: {n_components_95}")

pca = PCA(n_components=n_components_95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

model2 = LinearRegression()
model2.fit(X_train_pca, y_train)
y_pred2 = model2.predict(X_test_pca)

r2_pca = r2_score(y_test, y_pred2)
rmse_pca = np.sqrt(mean_squared_error(y_test, y_pred2))

comparison = pd.DataFrame({
    "Model": ["Without PCA", "With PCA"],
    "R² Score": [r2_no_pca, r2_pca],
    "RMSE": [rmse_no_pca, rmse_pca]
})
print("\nModel Performance Comparison:\n", comparison)

plt.figure(figsize=(6, 4))
sns.barplot(data=comparison.melt(id_vars="Model", var_name="Metric", value_name="Value"),
            x="Metric", y="Value", hue="Model", palette="viridis")
plt.title("Model Comparison (With vs Without PCA)")
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [10]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

try:
    df = pd.read_csv("uber.csv")
except FileNotFoundError:
    print("Error: 'uber.csv' not found.")
    print("Please download the dataset and place it in the same directory.")
    exit()
    
print("Dataset Loaded Successfully!")
print("Shape:", df.shape)
print(df.head())

df = df.drop_duplicates().dropna()
numeric_df = df.select_dtypes(include=[np.number])

target_col = None
for col in ['fare_amount', 'price', 'amount', 'fare']:
    if col in numeric_df.columns:
        target_col = col
        break
if target_col is None:
    if numeric_df.shape[1] > 0:
        target_col = numeric_df.columns[0]
    else:
        raise ValueError("No numeric columns found in dataset.")

X = numeric_df.drop(columns=[target_col])
y = numeric_df[target_col]

print(f"Target Variable: {target_col}")

print("\nSummary Statistics:\n", df.describe())

plt.figure(figsize=(10, 6))
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Feature Correlation Heatmap")
plt.show()

plt.figure(figsize=(7, 4))
sns.histplot(y, bins=30, kde=True, color='skyblue')
plt.title(f"Distribution of {target_col}")
plt.xlabel(target_col)
plt.ylabel("Frequency")
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_no_pca = LinearRegression()
model_no_pca.fit(X_train_scaled, y_train)
y_pred_no_pca = model_no_pca.predict(X_test_scaled)

r2_no_pca = r2_score(y_test, y_pred_no_pca)
rmse_no_pca = np.sqrt(mean_squared_error(y_test, y_pred_no_pca))
mae_no_pca = mean_absolute_error(y_test, y_pred_no_pca)

print("\n=== Model Without PCA ===")
print(f"R² Score: {r2_no_pca:.4f}")
print(f"RMSE: {rmse_no_pca:.4f}")
print(f"MAE: {mae_no_pca:.4f}")

pca_full = PCA()
pca_full.fit(X_train_scaled)
explained_cumsum = np.cumsum(pca_full.explained_variance_ratio_)

plt.figure(figsize=(7, 4))
plt.plot(range(1, len(explained_cumsum) + 1), explained_cumsum, marker='o')
plt.axhline(0.95, color='red', linestyle='--', label='95% Variance')
plt.title("Cumulative Explained Variance by PCA Components")
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Variance Explained")
plt.legend()
plt.grid(True)
plt.show()

n_components_95 = int(np.searchsorted(explained_cumsum, 0.95) + 1)
print(f"\nNumber of components to retain 95% variance: {n_components_95}")

pca = PCA(n_components=n_components_95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

model_pca = LinearRegression()
model_pca.fit(X_train_pca, y_train)
y_pred_pca = model_pca.predict(X_test_pca)

r2_pca = r2_score(y_test, y_pred_pca)
rmse_pca = np.sqrt(mean_squared_error(y_test, y_pred_pca))
mae_pca = mean_absolute_error(y_test, y_pred_pca)

print("\n=== Model With PCA ===")
print(f"R² Score: {r2_pca:.4f}")
print(f"RMSE: {rmse_pca:.4f}")
print(f"MAE: {mae_pca:.4f}")

comparison = pd.DataFrame({
    "Model": ["Without PCA", "With PCA"],
    "R² Score": [r2_no_pca, r2_pca],
    "RMSE": [rmse_no_pca, rmse_pca],
    "MAE": [mae_no_pca, mae_pca]
})

print("\nModel Performance Comparison:\n", comparison)

plt.figure(figsize=(8, 5))
sns.barplot(data=comparison.melt(id_vars="Model", var_name="Metric", value_name="Score"),
            x="Metric", y="Score", hue="Model", palette="viridis")
plt.title("Model Comparison (With vs Without PCA)")
plt.ylabel("Score Value")
plt.grid(True)
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [11]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

try:
    df = pd.read_csv("house.csv")
except FileNotFoundError:
    print("Error: 'house.csv' not found.")
    print("Please download a suitable house price dataset and ensure columns match.")
    exit()

# Simple check for required columns
required_cols = ["Id", "Price", "Location", "Condition", "Garage"]
if not all(col in df.columns for col in required_cols):
    print(f"Error: Dataset must contain {required_cols}")
    # Making up data if file is missing/wrong
    data = {
        "Id": range(100),
        "Price": np.random.randint(100000, 500000, 100),
        "Location": np.random.choice(["Urban", "Suburban", "Rural"], 100),
        "Condition": np.random.choice(["Good", "Fair", "Poor"], 100),
        "Garage": np.random.choice(["Yes", "No"], 100),
        "Area": np.random.randint(1000, 3000, 100),
        "Bedrooms": np.random.randint(2, 6, 100)
    }
    df = pd.DataFrame(data)
    print("Using dummy data as 'house.csv' was not found or valid.")


X = df.drop(columns=["Id", "Price"])
y = df["Price"].values

# Identify categorical columns
categorical_features = ["Location", "Condition", "Garage"]
# Identify numerical columns
numerical_features = [col for col in X.columns if col not in categorical_features]

# Create a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])

# Create the pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

kf = KFold(n_splits=5, shuffle=True, random_state=1)

mses = -cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')
rmses = np.sqrt(mses)
r2_scores = cross_val_score(model, X, y, cv=kf, scoring='r2')

print("5-Fold MSEs:", np.round(mses, 2))
print("Mean MSE:", np.round(mses.mean(), 2))
print("5-Fold RMSEs:", np.round(rmses, 2))
print("Mean RMSE:", np.round(rmses.mean(), 2))
print("5-Fold R² Scores:", np.round(r2_scores, 4))
print("Mean R² Score:", np.round(r2_scores.mean(), 4))

ModuleNotFoundError: No module named 'pandas'

In [12]:
import pandas as pd
import numpy as np

try:
    df = pd.read_csv("student.csv")
except FileNotFoundError:
    print("Warning: 'student.csv' not found. Using dummy data.")
    data = {
        "student_id": range(1, 21),
        "hours_studied": np.random.rand(20) * 10 + 1,
        "exam_score": np.zeros(20) # will be filled
    }
    # Create a linear relationship with noise
    data["exam_score"] = data["hours_studied"] * 8.5 + 15 + np.random.randn(20) * 5
    df = pd.DataFrame(data)

if "hours_studied" not in df.columns or "exam_score" not in df.columns:
     raise ValueError("CSV must contain 'hours_studied' and 'exam_score' columns.")

X = df["hours_studied"].values
y = df["exam_score"].values

mean_x = np.mean(X)
mean_y = np.mean(y)

num = np.sum((X - mean_x) * (y - mean_y))
den = np.sum((X - mean_x) ** 2)

m = num / den
c = mean_y - m * mean_x

def predict(x):
    return m * x + c

y_pred = predict(X)

mse = np.mean((y - y_pred) ** 2)
ss_res = np.sum((y - y_pred) ** 2)
ss_tot = np.sum((y - np.mean(y)) ** 2)
r2 = 1 - (ss_res / ss_tot)

print("Linear Regression from Scratch")
print(f"Formula: score = {m:.4f} * hours + {c:.4f}")
print("-" * 30)
print("Slope (m):", round(m, 4))
print("Intercept (c):", round(c, 4))
print("Mean Squared Error (MSE):", round(mse, 4))
print("R² Score:", round(r2, 4))

ModuleNotFoundError: No module named 'pandas'

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression

try:
    df = pd.read_csv("student.csv")
except FileNotFoundError:
    print("Warning: 'student.csv' not found. Using dummy data.")
    data = {
        "hours_studied": np.random.rand(50) * 10 + 1,
        "sleep_hours": np.random.rand(50) * 3 + 5,
        "attendance_percent": np.random.rand(50) * 30 + 70,
        "Internal_marks": np.random.rand(50) * 20 + 10,
        "exam_score": np.zeros(50)
    }
    data["exam_score"] = (data["hours_studied"]*3 + 
                          data["attendance_percent"]*0.5 + 
                          data["Internal_marks"]*1.5 + 
                          np.random.randn(50)*3)
    df = pd.DataFrame(data)

features = ["hours_studied", "sleep_hours", "attendance_percent", "Internal_marks"]
target = "exam_score"

if not all(col in df.columns for col in features + [target]):
    raise ValueError("CSV missing required columns.")

X = df[features]
y = df[target]

model = LinearRegression()
kf = KFold(n_splits=5, shuffle=True, random_state=1)

r2_scores = cross_val_score(model, X, y, cv=kf, scoring="r2")

print("K-Fold Cross-Validation for Student Scores")
print("R² Scores for each fold:", np.round(r2_scores, 4))
print("Average R² Score:", round(np.mean(r2_scores), 4))

ModuleNotFoundError: No module named 'pandas'

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

try:
    df = pd.read_csv("salary.csv")
except FileNotFoundError:
    print("Warning: 'salary.csv' not found. Using dummy data.")
    data = {
        'Age': np.random.randint(22, 60, 100),
        'Gender': np.random.choice(['Male', 'Female'], 100),
        'Education Level': np.random.choice(["Bachelor's", "Master's", "PhD"], 100),
        'Job Title': np.random.choice(['Developer', 'Manager', 'Analyst'], 100),
        'Years of Experience': np.random.randint(0, 30, 100),
        'Salary': np.zeros(100)
    }
    data['Salary'] = 50000 + data['Years of Experience']*2000 + np.random.randn(100)*5000
    df = pd.DataFrame(data)

df = df.dropna()

X = df.drop(columns=["Salary"])
y = df["Salary"]

# Identify categorical columns
categorical_features = X.select_dtypes(include=['object', 'category']).columns
# Identify numerical columns
numerical_features = X.select_dtypes(include=np.number).columns

# Create a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])

# Create the pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

kf = KFold(n_splits=5, shuffle=True, random_state=42)

r2_scores = cross_val_score(model, X, y, cv=kf, scoring="r2")

print("K-Fold Cross-Validation for IT Salaries")
print("R² Scores for each fold:", np.round(r2_scores, 4))
print("Average R² Score:", round(np.mean(r2_scores), 4))

ModuleNotFoundError: No module named 'pandas'

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LinearRegression

# Data provided in the prompt
data = {
    "ad_spend": [1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,
                 1200,1700,2200,2700,3200,3700,4200,4700,5200,5700],
    "discount": [5,10,0,15,20,5,10,25,30,15,
                 8,12,3,18,22,7,14,28,33,17],
    "customer_footfall": [200,250,300,350,400,450,500,550,600,650,
                          210,260,310,360,410,460,510,560,610,660],
    "sales": [10000,15000,13000,20000,25000,22000,27000,30000,35000,40000,
              11000,16000,14000,21000,26000,23000,28000,31000,36000,41000]
}
# Increased N to 20 for KFold=5 to work
df = pd.DataFrame(data)

X = df[["ad_spend", "discount", "customer_footfall"]]
y = df["sales"]

model = LinearRegression()
# n_splits=5 requires at least 5 samples. 
# We set n_splits=4 because the data (N=10) is small
# Updated N=20, so n_splits=5 is fine.
kf = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(model, X, y, cv=kf, scoring="r2")

print("K-Fold Cross-Validation for Sales Forecast")
print("R² Scores for each fold:", np.round(scores, 4))
print("Average R²:", np.round(np.mean(scores), 4))

ModuleNotFoundError: No module named 'pandas'

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt

try:
    df = pd.read_csv("emails.csv")
except FileNotFoundError:
    print("Warning: 'emails.csv' not found. Using dummy data.")
    # Create dummy word frequency data
    N = 1000
    data = {f"word_{i}": np.random.randint(0, 5, N) for i in range(50)}
    data["Prediction"] = np.random.choice([0, 1], N, p=[0.7, 0.3])
    df = pd.DataFrame(data)

# Drop any non-numeric columns (like "Email No." or text columns)
df = df.select_dtypes(include=["number"])
df = df.dropna()

if "Prediction" not in df.columns:
    raise ValueError("The dataset must have a 'Prediction' column as the target variable.")

X = df.drop(columns=["Prediction"])
y = df["Prediction"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("=== Naïve Bayes Email Spam Detection ===")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("Confusion Matrix:\n", cm)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Not Spam', 'Spam'], 
            yticklabels=['Not Spam', 'Spam'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

try:
    df = pd.read_csv("emails.csv")
except FileNotFoundError:
    print("Warning: 'emails.csv' not found. Using dummy data.")
    N = 1000
    data = {f"word_{i}": np.random.randint(0, 5, N) for i in range(50)}
    data["Prediction"] = np.random.choice([0, 1], N, p=[0.7, 0.3])
    df = pd.DataFrame(data)

df = df.select_dtypes(include=["number"]).dropna()

if "Prediction" not in df.columns:
    raise ValueError("The dataset must have a 'Prediction' column.")

X = df.drop(columns=["Prediction"])
y = df["Prediction"].values
X_train_df, X_test_df, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_train = X_train_df.values
X_test = X_test_df.values

def fit_naive_bayes(X, y):
    classes = np.unique(y)
    class_prob = {}
    feature_prob = {}
    n_features = X.shape[1]
    
    for c in classes:
        X_c = X[y == c]
        class_prob[c] = X_c.shape[0] / X.shape[0]
        # Laplace (add-1) smoothing
        feature_prob[c] = (X_c.sum(axis=0) + 1) / (X_c.sum() + n_features)
    return classes, class_prob, feature_prob

def predict_naive_bayes(X, classes, class_prob, feature_prob):
    preds = []
    for x in X:
        posteriors = []
        for c in classes:
            # Use log probabilities to avoid underflow
            log_prior = np.log(class_prob[c])
            # Handle cases where feature_prob[c] is 0 (though smoothing should prevent this)
            log_likelihood = np.sum(np.log(feature_prob[c] + 1e-9) * x)
            posteriors.append(log_prior + log_likelihood)
        preds.append(classes[np.argmax(posteriors)])
    return np.array(preds)


classes, class_prob, feature_prob = fit_naive_bayes(X_train, y_train)
y_pred = predict_naive_bayes(X_test, classes, class_prob, feature_prob)

acc = np.mean(y_pred == y_test)
print("=== Naïve Bayes (From Scratch) ===")
print("Email Spam Detection Accuracy:", round(acc, 4))

ModuleNotFoundError: No module named 'pandas'

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import seaborn as sns
import matplotlib.pyplot as plt

try:
    df = pd.read_csv("emails.csv")
except FileNotFoundError:
    print("Warning: 'emails.csv' not found. Using dummy data.")
    N = 1000
    data = {f"word_{i}": np.random.randint(0, 5, N) for i in range(50)}
    data["Prediction"] = np.random.choice([0, 1], N, p=[0.85, 0.15]) # Imbalanced
    df = pd.DataFrame(data)

df = df.select_dtypes(include=["number"]).dropna()

if "Prediction" not in df.columns:
    raise ValueError("The dataset must have a 'Prediction' column.")
    
X = df.drop(columns=["Prediction"])
y = df["Prediction"]

print("Class Distribution Before Balancing:\n", y.value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Apply SMOTE (oversampling minority class)
# Note: SMOTE works best on non-sparse data.
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

print("\nClass Distribution After SMOTE:\n", pd.Series(y_train_res).value_counts())

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)

# Train SVM Model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train_scaled, y_train_res)

y_pred = svm_model.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("\n=== SVM Email Spam Detection Results (with SMOTE) ===")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:\n", cm)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", 
            xticklabels=["Normal", "Spam"], 
            yticklabels=["Normal", "Spam"])
plt.title("SVM Confusion Matrix (SMOTE)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [19]:
import pandas as pd
import numpy as np

try:
    df = pd.read_csv("emails.csv")
except FileNotFoundError:
    print("Warning: 'emails.csv' not found. Using dummy data.")
    N = 1000
    data = {f"word_{i}": np.random.randint(0, 5, N) for i in range(50)}
    data["Prediction"] = np.random.choice([0, 1], N, p=[0.7, 0.3])
    df = pd.DataFrame(data)

df = df.select_dtypes(include=["number"]).dropna()

if "Prediction" not in df.columns:
    raise ValueError("The dataset must have a 'Prediction' column.")

X = df.drop(columns=["Prediction"]).values
y = df["Prediction"].values

# SVM requires labels -1 and 1
y = np.where(y == 1, 1, -1)

# Standardize
X = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-6)


split = int(0.7 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

class FastSVM:
    def __init__(self, lr=0.0001, lambda_param=0.01, epochs=300):
        self.lr = lr
        self.lambda_param = lambda_param
        self.epochs = epochs
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epochs):
            margin = y * (np.dot(X, self.w) - self.b)
            misclassified = margin < 1
            
            # Hinge Loss Gradient
            # dw = lambda*w - y*x (if misclassified)
            # db = -y (if misclassified)
            
            dw = (self.lambda_param * self.w) - np.dot(X[misclassified].T, y[misclassified])
            db = -np.sum(y[misclassified])

            self.w -= self.lr * dw
            self.b -= self.lr * db

    def predict(self, X):
        return np.sign(np.dot(X, self.w) - self.b)

model = FastSVM(lr=0.00001, lambda_param=0.01, epochs=500)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

tp = np.sum((y_test == 1) & (y_pred == 1))
tn = np.sum((y_test == -1) & (y_pred == -1))
fp = np.sum((y_test == -1) & (y_pred == 1))
fn = np.sum((y_test == 1) & (y_pred == -1))

accuracy = (tp + tn) / (tp + tn + fp + fn + 1e-6)
precision = tp / (tp + fp + 1e-6)
recall = tp / (tp + fn + 1e-6)
f1 = 2 * precision * recall / (precision + recall + 1e-6)

print("=== SVM (From Scratch) Email Spam Detection ===")
print(f"Confusion Matrix:\n[[TP={tp}, FP={fp}], [FN={fn}, TN={tn}]]")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")

ModuleNotFoundError: No module named 'pandas'

In [20]:
import pandas as pd
import numpy as np

try:
    df = pd.read_csv("performance.csv").dropna()
except FileNotFoundError:
    print("Warning: 'performance.csv' not found. Using dummy data.")
    N = 100
    data = {
        "Study_Hours_per_Week": np.random.rand(N) * 15 + 1,
        "Attendance_Rate": np.random.rand(N) * 30 + 70,
        "Internal_Scores": np.random.rand(N) * 40 + 60,
        "Pass_Fail": np.random.choice(["Pass", "Fail"], N)
    }
    df = pd.DataFrame(data)

# Ensure required columns exist
required_cols = ["Study_Hours_per_Week", "Attendance_Rate", "Internal_Scores", "Pass_Fail"]
if not all(col in df.columns for col in required_cols):
    raise ValueError("CSV missing required columns.")

X = df[["Study_Hours_per_Week", "Attendance_Rate", "Internal_Scores"]].values.astype(float)
y = np.where(df["Pass_Fail"] == "Pass", 1, -1)

# Standardize
X = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-6)

split = int(0.7 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

def polynomial_kernel(X1, X2, degree=2, coef0=1):
    return np.power(np.dot(X1, X2.T) + coef0, degree)

class PolynomialSVM:
    def __init__(self, C=1.0, lr=0.001, n_iters=300, degree=2, coef0=1):
        self.C = C
        self.lr = lr
        self.n_iters = n_iters
        self.degree = degree
        self.coef0 = coef0
        self.alpha = None
        self.b = None
        self.X = None
        self.y = None

    def fit(self, X, y):
        n = X.shape[0]
        self.alpha = np.zeros(n)
        self.b = 0
        K = polynomial_kernel(X, X, self.degree, self.coef0)

        for _ in range(self.n_iters):
            margin = np.dot((self.alpha * y), K) + self.b
            
            for i in range(n):
                condition = y[i] * margin[i] < 1
                
                # Gradient of the dual loss w.r.t alpha[i]
                # We use a simplified (primal) gradient descent on alphas (not exactly SMO)
                # This is a Subgradient method for the dual
                grad = 1 - (y[i] * margin[i]) # Hinge loss part
                
                if condition:
                    # Update alpha[i] based on hinge loss
                    self.alpha[i] += self.lr * (1 - self.C * self.alpha[i])
                else:
                    # Regularization part
                    self.alpha[i] += self.lr * (-self.C * self.alpha[i])

                self.alpha[i] = np.clip(self.alpha[i], 0, self.C) # Box constraint

            # Update bias (simplified)
            self.b -= self.lr * np.mean(y * (margin < 1))

        self.X, self.y = X, y

    def project(self, X):
        K = polynomial_kernel(X, self.X, self.degree, self.coef0)
        return np.dot(self.alpha * self.y, K.T) + self.b

    def predict(self, X):
        return np.sign(self.project(X))

model = PolynomialSVM(C=1.0, lr=0.0001, n_iters=500, degree=2, coef0=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

tp = np.sum((y_test == 1) & (y_pred == 1))
tn = np.sum((y_test == -1) & (y_pred == -1))
fp = np.sum((y_test == -1) & (y_pred == 1))
fn = np.sum((y_test == 1) & (y_pred == -1))

precision = tp / (tp + fp + 1e-6)
recall = tp / (tp + fn + 1e-6)
f1 = 2 * precision * recall / (precision + recall + 1e-6)

print("=== Polynomial SVM (From Scratch) - Student Performance ===")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-Score : {f1:.4f}")
print(f"Confusion Matrix:\n[[TP={tp}, FP={fp}], [FN={fn}, TN={tn}]]")

ModuleNotFoundError: No module named 'pandas'

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

try:
    df = pd.read_csv("cancer.csv")
except FileNotFoundError:
    print("Warning: 'cancer.csv' not found. Using dummy data.")
    N = 500
    data = {f"feature_{i}": np.random.rand(N) for i in range(30)}
    data['id'] = range(N)
    data['diagnosis'] = np.random.choice(['M', 'B'], N)
    df = pd.DataFrame(data)

df = df.drop(columns=["id"], errors='ignore').dropna()

if "diagnosis" not in df.columns:
    raise ValueError("CSV must contain a 'diagnosis' column (M/B).")

y = np.where(df["diagnosis"] == "M", 1, -1)
X = df.drop(columns=["diagnosis"]).values.astype(float)

X = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-6)

split_idx = int(0.7 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

def polynomial_kernel(X1, X2, degree=3, coef0=1):
    return (np.dot(X1, X2.T) + coef0) ** degree

class PolynomialSVM:
    def __init__(self, C=1.0, lr=0.001, n_iters=300, degree=3, coef0=1):
        self.C = C
        self.lr = lr
        self.n_iters = n_iters
        self.degree = degree
        self.coef0 = coef0
        self.alpha = None
        self.b = None
        self.X = None
        self.y = None

    def fit(self, X, y):
        n = X.shape[0]
        self.alpha = np.zeros(n)
        self.b = 0
        K = polynomial_kernel(X, X, self.degree, self.coef0)

        for _ in range(self.n_iters):
            margin = np.dot((self.alpha * y), K) + self.b
            
            for i in range(n):
                condition = y[i] * margin[i] < 1
                
                # Subgradient method for the dual
                if condition:
                    self.alpha[i] += self.lr * (1 - self.C * self.alpha[i])
                else:
                    self.alpha[i] += self.lr * (-self.C * self.alpha[i])

                self.alpha[i] = np.clip(self.alpha[i], 0, self.C)

            self.b -= self.lr * np.mean(y * (margin < 1))

        self.X, self.y = X, y

    def project(self, X):
        K = polynomial_kernel(X, self.X, self.degree, self.coef0)
        return np.dot(self.alpha * self.y, K.T) + self.b

    def predict(self, X):
        return np.sign(self.project(X))

model = PolynomialSVM(C=1.0, lr=0.0001, n_iters=500, degree=3, coef0=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_scores = model.project(X_test)  # For ROC curve

tp = np.sum((y_test == 1) & (y_pred == 1))
tn = np.sum((y_test == -1) & (y_pred == -1))
fp = np.sum((y_test == -1) & (y_pred == 1))
fn = np.sum((y_test == 1) & (y_pred == -1))

accuracy = (tp + tn) / (tp + tn + fp + fn + 1e-6)
precision = tp / (tp + fp + 1e-6)
recall = tp / (tp + fn + 1e-6)
f1 = 2 * precision * recall / (precision + recall + 1e-6)

print("=== Polynomial SVM (From Scratch) – Breast Cancer Classification ===")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1-Score : {f1:.4f}")
print(f"\nConfusion Matrix:\n[[TP={tp}, FP={fp}], [FN={fn}, TN={tn}]]")

def compute_roc(y_true, y_score):
    # Sort scores and corresponding true labels
    indices = np.argsort(y_score)
    y_true_sorted = y_true[indices]
    y_score_sorted = y_score[indices]
    
    tpr_list, fpr_list = [0.0], [0.0]
    n_pos = np.sum(y_true == 1)
    n_neg = np.sum(y_true == -1)
    
    tp, fp = 0, 0
    
    # Iterate through sorted scores from high to low
    for i in range(len(y_score_sorted) - 1, -1, -1):
        if y_true_sorted[i] == 1:
            tp += 1
        else:
            fp += 1
        
        tpr_list.append(tp / (n_pos + 1e-6))
        fpr_list.append(fp / (n_neg + 1e-6))
        
    tpr_list.append(1.0)
    fpr_list.append(1.0)
    return np.array(fpr_list), np.array(tpr_list)

# Compute ROC points
fpr, tpr = compute_roc(y_test, y_scores)

# Plot ROC Curve
plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, color='blue', label='Polynomial SVM (Scratch)')
plt.plot([0, 1], [0, 1], 'r--', label='Random Chance')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate (Recall)")
plt.title("ROC Curve – SVM with Polynomial Kernel")
plt.legend()
plt.grid(True)
plt.show()

ModuleNotFoundError: No module named 'pandas'