# Principal Engineer Data Science Interview

This notebook demonstrates a basic logistic regression model with minimal dependencies.

## 1. Environment Setup

In [None]:
# Import only essential libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

: 

## 2. Data Generation & Exploration

In [None]:
# Create synthetic dataset for demonstration
np.random.seed(42)
df = pd.DataFrame({
    'feature_1': np.random.normal(0, 1, 500),
    'feature_2': np.random.normal(2, 1.5, 500),
    'target': np.random.binomial(1, 0.3, 500)
})

# Display the first few rows
print("Sample data:")
df.head()

In [None]:
# Quick data summary
print(f"Dataset shape: {df.shape}")
print("\nData summary:")
df.describe()

## 3. Model Development

In [None]:
# Prepare features and target
X = df[['feature_1', 'feature_2']]
y = df['target']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

In [None]:
# Train logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Generate predictions
y_pred = model.predict(X_test_scaled)

# Model coefficients
print("Model coefficients:")
for feature, coef in zip(['feature_1', 'feature_2'], model.coef_[0]):
    print(f"  {feature}: {coef:.4f}")
print(f"  Intercept: {model.intercept_[0]:.4f}")

## 4. Model Evaluation

In [None]:
# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

## 5. Making Predictions with the Model

In [None]:
# Demonstrate prediction on new data points
new_data = np.array([[0.5, 1.0], [-1.0, 2.5], [0.0, 0.0]])
new_data_scaled = scaler.transform(new_data)

# Predict class probabilities
probabilities = model.predict_proba(new_data_scaled)

# Create a DataFrame for clearer output
results = pd.DataFrame({
    'feature_1': new_data[:, 0],
    'feature_2': new_data[:, 1],
    'probability_class_1': probabilities[:, 1],
    'predicted_class': model.predict(new_data_scaled)
})

print("Predictions for new data points:")
results