<h1>scikit-learn practice</h1>

In [6]:
# 1. Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline


In [7]:
iris = datasets.load_iris()
X = iris.data      # features (flower measurements)
y = iris.target    # target (flower species)
print("Feature shape:", X.shape)
print("Target shape:", y.shape)


Feature shape: (150, 4)
Target shape: (150,)


<h3>Train-Test Split</h3>

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


<h3>Train a Logistic Regression Model</h3>

In [4]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

<h3> Make Predictions</h3>

In [10]:
y_pred = model.predict(X_test)
print("Predictions:", y_pred)


Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


<h3>Model Accuracy</h3>

In [11]:
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 1.0


<h3>Confusion Matrix</h3>

In [12]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


<h3>Classification Report</h3>

In [13]:
print("Classification Report:\n", classification_report(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



<h3>Standardize Features (Scaler)</h3>

In [14]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Scaled Feature Sample:", X_scaled[:5])


Scaled Feature Sample: [[-0.90068117  1.01900435 -1.34022653 -1.3154443 ]
 [-1.14301691 -0.13197948 -1.34022653 -1.3154443 ]
 [-1.38535265  0.32841405 -1.39706395 -1.3154443 ]
 [-1.50652052  0.09821729 -1.2833891  -1.3154443 ]
 [-1.02184904  1.24920112 -1.34022653 -1.3154443 ]]


<h3>Label Encoding (convert labels to numbers)</h3>

In [15]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
print("Encoded Labels:", y_encoded[:10])


Encoded Labels: [0 0 0 0 0 0 0 0 0 0]


<h3>Decision Tree Classifier</h3>

In [16]:
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
print("Decision Tree Accuracy:", tree.score(X_test, y_test))


Decision Tree Accuracy: 1.0


<h3>Random Forest Classifier</h3>

In [17]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
print("Random Forest Accuracy:", rf.score(X_test, y_test))


Random Forest Accuracy: 1.0


<h3>Cross-Validation</h3>

In [None]:
scores = cross_val_score(model, X, y, cv=5)
print("Cross-Validation Scores:", scores)
print("Mean Accuracy:", scores.mean())


<h3>Pipeline (Scaler + Model)</h3>

In [20]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=200))
])
pipe.fit(X_train, y_train)
print("Pipeline Accuracy:", pipe.score(X_test, y_test))


Pipeline Accuracy: 1.0


<h3>Learning Curve</h3>

In [None]:
train_sizes, train_scores, test_scores = learning_curve(
    model, X, y, cv=5, train_sizes=np.linspace(0.1, 1.0, 5)
)

plt.plot(train_sizes, train_scores.mean(axis=1), label='Train')
plt.plot(train_sizes, test_scores.mean(axis=1), label='Test')
plt.legend()
plt.title("Learning Curve")
plt.show()
