In [32]:
# Step 1: Import necessary libraries
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [33]:
# Step 2: Fetch the dataset
poker_hand = fetch_ucirepo(id=158)

# Extract features and target
X = poker_hand.data.features
y = poker_hand.data.targets

In [34]:
# Check the first few rows of features (X)
print(X.head())

# Check the first few rows of the target (y)
print(y.head())

# Check the shape of the data
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")


   S1  C1  S2  C2  S3  C3  S4  C4  S5  C5
0   1  10   1  11   1  13   1  12   1   1
1   2  11   2  13   2  10   2  12   2   1
2   3  12   3  11   3  13   3  10   3   1
3   4  10   4  11   4   1   4  13   4  12
4   4   1   4  13   4  12   4  11   4  10
   CLASS
0      9
1      9
2      9
3      9
4      9
Features shape: (1025010, 10)
Target shape: (1025010, 1)


In [35]:
# Step 3: Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# Step 4: Apply Min-Max scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [37]:
# Step 5: Convert y_train and y_test to NumPy arrays and fix their shape to be 1D arrays
y_train = y_train.values.ravel()  # Convert to NumPy array and flatten it
y_test = y_test.values.ravel()    # Convert to NumPy array and flatten it

# Initialize and train the KNN model with k=?
knn = KNeighborsClassifier(n_neighbors = 11)
knn.fit(X_train_scaled, y_train)


In [None]:
# Step 6: Predict using the test set
y_pred = knn.predict(X_test_scaled)

In [None]:
# Step 7: Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy after scaling: {accuracy * 100:.2f}%")

In [None]:
# Print accuracy on the training data
print("Training accuracy:", knn.score(X_train_scaled, y_train))

# Print accuracy on the test data
print("Test accuracy:", knn.score(X_test_scaled, y_test))


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import classification_report

# Predict on the test set
y_pred = knn.predict(X_test_scaled)

# Generate classification report
report = classification_report(y_test, y_pred, target_names=[
    "Nothing in hand", "One pair", "Two pairs", "Three of a kind",
    "Straight", "Flush", "Full house", "Four of a kind", "Straight flush", "Royal flush"
])

print(report)


k=5, 53.29% accuracy
k=9, 54.46%

In [None]:
import matplotlib.pyplot as plt

# Example k values and corresponding accuracies
k_values = [1, 5, 7, 9, 11]
accuracies = [50.00, 53.29, 53.81, 54.46, --]

# Plot
plt.figure(figsize=(8, 5))
plt.plot(k_values, accuracies, marker='o', color='blue')
plt.title("k-NN Accuracy vs. Number of Neighbors")
plt.xlabel("Number of Neighbors (k)")
plt.ylabel("Accuracy (%)")
plt.grid()
plt.show()
