In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = r'C:/Users/asokk/Downloads/framingham.csv'  # Replace with the correct file path
df = pd.read_csv(file_path)

In [4]:
# Drop rows with NA values
df_clean = df.dropna()

In [5]:
# Verify that there are no more NA values
assert not df_clean.isnull().values.any(), "There are still missing values in the dataset."

In [7]:
# Define the feature matrix X and the target vector y
X = df_clean.drop(columns=['TenYearCHD']).values
y = df_clean['TenYearCHD'].values

In [8]:
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
# Add intercept term to the feature matrix X_scaled
X_scaled = np.insert(X_scaled, 0, 1, axis=1)

class LogisticRegressionNumpy:
    def __init__(self, learning_rate=0.001, num_iterations=10000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        self.m, self.n = X.shape
        self.theta = np.zeros(self.n)
        
        for i in range(self.num_iterations):
            z = np.dot(X, self.theta)
            h = self.sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / self.m
            self.theta -= self.learning_rate * gradient
    
    def predict(self, X):
        z = np.dot(X, self.theta)
        h = self.sigmoid(z)
        return (h >= 0.5).astype(int)

In [11]:
# Instantiate the model
model = LogisticRegressionNumpy(learning_rate=0.001, num_iterations=10000)

# Train the model
model.fit(X_scaled, y)

# Make predictions
predictions = model.predict(X_scaled)

# Calculate accuracy
accuracy = np.mean(predictions == y)
print("Model Accuracy:", accuracy)

# Calculate false negatives and false positives
false_negatives = np.sum((predictions == 0) & (y == 1))
false_positives = np.sum((predictions == 1) & (y == 0))
true_positives = np.sum(y == 1)
true_negatives = np.sum(y == 0)

# Calculate percentages
percentage_false_negatives = false_negatives / true_positives * 100
percentage_false_positives = false_positives / true_negatives * 100

# Print results
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(f"Percentage of False Negatives: {percentage_false_negatives:.2f}%")
print(f"Percentage of False Positives: {percentage_false_positives:.2f}%")

Model Accuracy: 0.8533916849015317
Model Accuracy: 85.34%
Percentage of False Negatives: 93.36%
Percentage of False Positives: 0.52%
