# Regression Models using scikit-learn

## 1. Classification

In [6]:
# Example 1: Logistic Regression

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Logistic Regression Accuracy: {accuracy:.2f}')

# Example 2: Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier

# Train the model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Classifier Accuracy: {accuracy:.2f}')


Logistic Regression Accuracy: 1.00
Random Forest Classifier Accuracy: 1.00


## 2. Regression

In [18]:
# Example 1: Linear Regression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Linear Regression MSE: {mse:.2f}, R-squared: {r2:.2f}')

Linear Regression MSE: 0.56, R-squared: 0.58


## 3. Clustering

In [23]:
# Example 1: K-Means Clustering
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Load dataset
X, _ = load_iris(return_X_y=True)

# Train the model
model = KMeans(n_clusters=3, random_state=42)
labels = model.fit_predict(X)

# Evaluate the model
score = silhouette_score(X, labels)
print(f'K-Means Silhouette Score: {score:.2f}')

#Example 2: Gaussian Mixture Model (GMM)

from sklearn.mixture import GaussianMixture

# Train the model
model = GaussianMixture(n_components=3, random_state=42)
labels = model.fit_predict(X)

# Evaluate the model
score = silhouette_score(X, labels)
print(f'GMM Silhouette Score: {score:.2f}')




K-Means Silhouette Score: 0.55
GMM Silhouette Score: 0.50




## 4. Dimensionality Reduction

In [26]:
# Example 1: Principal Component Analysis (PCA)
from sklearn.decomposition import PCA

# Load dataset
X, _ = load_iris(return_X_y=True)

# Apply PCA
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X)

print(f'Explained variance ratio: {pca.explained_variance_ratio_}')

#Example 2: t-Distributed Stochastic Neighbor Embedding (t-SNE)

from sklearn.manifold import TSNE

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
X_reduced = tsne.fit_transform(X)

print('t-SNE completed successfully.')


Explained variance ratio: [0.92461872 0.05306648]
t-SNE completed successfully.
