In [None]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Step 2: Load the Social Network Ads dataset (local path)
# Download file from here:  https://github.com/shivang98/Social-Network-ads-Boost/blob/master/Social_Network_Ads.csv
df = pd.read_csv('Social_Network_Ads.csv')  # Update the file path if necessary

# Step 3: Inspect the dataset
print(df.head())  # Check the first few rows of the dataset

# Step 4: Preprocessing (Feature Selection & Scaling)
# Selecting relevant features (Age, EstimatedSalary) and target variable (Purchased)
X = df.iloc[:, [2, 3]].values  # Features: Age, EstimatedSalary
y = df.iloc[:, 4].values  # Target: Purchased

# Step 5: Split the data into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Feature Scaling (Normalization)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)  # Fit and transform training data
X_test = sc.transform(X_test)  # Only transform test data

# Step 7: Create Logistic Regression Model and Train it
classifier = LogisticRegression(random_state=42)
classifier.fit(X_train, y_train)  # Train the model

# Step 8: Predicting the test set results
y_pred = classifier.predict(X_test)

# Step 9: Confusion Matrix and Evaluation Metrics
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Step 10: Extract TP, FP, TN, FN from the confusion matrix
TP = cm[1, 1]  # True Positive
FP = cm[0, 1]  # False Positive
TN = cm[0, 0]  # True Negative
FN = cm[1, 0]  # False Negative

print(f"True Positive (TP): {TP}")
print(f"False Positive (FP): {FP}")
print(f"True Negative (TN): {TN}")
print(f"False Negative (FN): {FN}")

# Step 11: Compute the performance metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Step 12: Visualization of Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"])
plt.title("Confusion Matrix")
plt.ylabel("Actual")
plt.xlabel("Predicted")
plt.show()



FileNotFoundError: [Errno 2] No such file or directory: 'Social_Network_Ads.csv'