# classification

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data=load_iris()
X=data.data # the features
y=data.target # the labels

# split data into training and testing
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# create and train the model
model=LogisticRegression(max_iter=200)
model.fit(X_train,y_train)

# make predictions
y_pred=model.predict(X_test)

# check how accurate the model is 
accuracy=accuracy_score(y_test,y_pred)
print('accuracy:',accuracy)

accuracy: 1.0


In [4]:
from sklearn.metrics import f1_score

# calculate recall and F1-score
# recall=recall_score(y_test,y_pred)
f1=f1_score(y_test,y_pred)

# print(f'recall:{recall:.2f}')
print(f'F1 score:{f1:.2f}')

F1 score:0.97


# regression

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

data=fetch_california_housing()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=LinearRegression()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)


# mean squared error
mse=mean_squared_error(y_test,y_pred)
print('mean squared error:',mse)

mean squared error: 0.555891598695242


# clustering

In [1]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

# Generate synthetic data
X, y = make_blobs(n_samples=10, centers=3, random_state=42)

# Create and fit the KMeans model
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

# Get the cluster labels
labels = kmeans.labels_

# Display the cluster assignments
for i, label in enumerate(labels):
    print(f'Data point {i+1}: Cluster {label}')

print("\n--- Clustering Metrics ---")

# 1. Silhouette Score (higher is better, range: [-1, 1])
silhouette_avg = silhouette_score(X, labels)
print(f'Silhouette Score: {silhouette_avg:.2f}')

# 2. Davies-Bouldin Index (lower is better)
davies_bouldin = davies_bouldin_score(X, labels)
print(f'Davies-Bouldin Index: {davies_bouldin:.2f}')

# 3. Calinski-Harabasz Index (higher is better)
calinski_harabasz = calinski_harabasz_score(X, labels)
print(f'Calinski-Harabasz Index: {calinski_harabasz:.2f}')


Data point 1: Cluster 0
Data point 2: Cluster 0
Data point 3: Cluster 2
Data point 4: Cluster 0
Data point 5: Cluster 1
Data point 6: Cluster 1
Data point 7: Cluster 1
Data point 8: Cluster 2
Data point 9: Cluster 2
Data point 10: Cluster 1

--- Clustering Metrics ---
Silhouette Score: 0.82
Davies-Bouldin Index: 0.19
Calinski-Harabasz Index: 166.25
