# Logistic Regression using scikit-learn in Python

#### Necessary libraries import

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#### Load and preprocess the data

In [2]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate random data for 100 rows
n_rows = 1000
X1 = np.random.normal(loc=5, scale=2, size=n_rows)
X2 = np.random.normal(loc=3, scale=1, size=n_rows)
X3 = np.random.normal(loc=7, scale=3, size=n_rows)

# Generate binary outcome variable (0 or 1)
Outcome = np.random.randint(0, 2, size=n_rows)

# Create DataFrame
data = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'Outcome': Outcome})

# Save to CSV
data.to_csv('example_dataset.csv', index=False)

print("Dataset with 100 rows created and saved as 'example_dataset.csv'.")

Dataset with 100 rows created and saved as 'example_dataset.csv'.


In [3]:
data.head()

Unnamed: 0,X1,X2,X3,Outcome
0,5.993428,4.399355,4.974465,1
1,4.723471,3.924634,6.566444,1
2,6.295377,3.05963,4.62274,1
3,8.04606,2.353063,6.076115,1
4,4.531693,3.698223,1.319156,0


#### Load the dataset

In [4]:
data = pd.read_csv("example_dataset.csv")

# Separate predictors (X) and outcome variable (y)
X = data.drop(columns=["Outcome"])
y = data["Outcome"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Train a Logistic Regression model

In [5]:
logistic_model = LogisticRegression()

logistic_model.fit(X_train, y_train) 

#### Make predictions

In [6]:
y_pred = logistic_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)

Accuracy: 0.595
Confusion Matrix:
 [[49 48]
 [33 70]]
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.51      0.55        97
           1       0.59      0.68      0.63       103

    accuracy                           0.59       200
   macro avg       0.60      0.59      0.59       200
weighted avg       0.60      0.59      0.59       200



In [7]:
# Example: Predicting on new data
new_data = np.array([[6.0, 4.5, 8.2]])  # Insert your new data values
scaled_new_data = scaler.transform(new_data)
prediction = logistic_model.predict(scaled_new_data)
print("Prediction:", prediction)

Prediction: [1]


