Title: Regression Models

Linear Regression (Simple & Multiple)

Task 1: Differentiate between a labeled dataset of image classifications and an unlabeled dataset for clustering customer segments.

In [1]:
# Write your code here
from sklearn.datasets import load_digits, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans

# --- Supervised Learning: Image Classification ---
digits = load_digits()
X_img, y_img = digits.data, digits.target
X_img_train, X_img_test, y_img_train, y_img_test = train_test_split(X_img, y_img, test_size=0.2)

clf = RandomForestClassifier()
clf.fit(X_img_train, y_img_train)
img_predictions = clf.predict(X_img_test)

print("Image Classification Predictions (first 5):")
print(img_predictions[:5])

# --- Unsupervised Learning: Clustering Customer Segments ---
X_cluster, _ = make_blobs(n_samples=100, centers=3, n_features=2, random_state=42)

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_cluster)
cluster_labels = kmeans.labels_

print("\nCustomer Segment Cluster Labels (first 5):")
print(cluster_labels[:5])


Image Classification Predictions (first 5):
[5 4 0 8 2]

Customer Segment Cluster Labels (first 5):
[1 2 0 2 1]


Task 2: Examine a problem statement and determine which learning type is applicable.<br>

In [2]:
# Write your code here
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Simulated data: [income, credit_score, employment_status_encoded]
X = np.random.rand(100, 3)  # 100 applicants, 3 features
y = np.random.choice([0, 1], size=100)  # 0 = Not Approved, 1 = Approved

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)

print("Loan Approval Predictions (0=No, 1=Yes):")
print(predictions[:5])


Loan Approval Predictions (0=No, 1=Yes):
[1 1 0 1 0]


Task 3: Identify whether facial recognition systems use supervised or unsupervised learning and justify.

In [3]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load labeled face data (people with at least 50 images)
faces = fetch_lfw_people(min_faces_per_person=50, resize=0.4)
X, y = faces.data, faces.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Model
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Predict
predictions = clf.predict(X_test)

print("Facial Recognition Predictions (first 5):")
print(predictions[:5])
# Write your code here

Facial Recognition Predictions (first 5):
[ 3  3 11 11  3]
