In [11]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import accuracy_score
from datasets import load_dataset
from sklearn.feature_extraction.text import CountVectorizer

### Iris data (KNN Classifier)

In [6]:
# Load the iris dataset
iris_X, iris_y = datasets.load_iris(return_X_y=True)

# Split train and test data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

# Predict and evaluate test data
y_pred = knn_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


### Diabetes data (KNN Regressor)

In [9]:
# load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

# Split train and test data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(diabetes_X, diabetes_y, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN Regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)

# Predict and evaluate test data
y_pred = knn_regressor.predict(X_test)
mse = np.mean((y_test - y_pred) ** 2)
print(f'Mean Squared Error: {mse:.2f}')

Mean Squared Error: 3047.45


### IMDB dataset

In [12]:
# Load IMDB dataset
imdb = load_dataset('imdb')
imdb_train, imdb_test = imdb['train'], imdb['test']

# Convert text to vectors using Bag of Words
vectorizer = CountVectorizer(max_features=1000)
X_train = vectorizer.fit_transform(imdb_train['text']).toarray()
X_test = vectorizer.transform(imdb_test['text']).toarray()
y_train = imdb_train['label']
y_test = imdb_test['label']

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=1, algorithm='ball_tree')
knn_classifier.fit(X_train, y_train)

# Predict and evaluate test data
y_pred = knn_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Downloading readme: 100%|██████████| 7.81k/7.81k [00:00<00:00, 33.2kB/s]
Downloading data: 100%|██████████| 21.0M/21.0M [00:01<00:00, 10.8MB/s]
Downloading data: 100%|██████████| 20.5M/20.5M [00:01<00:00, 11.3MB/s]
Downloading data: 100%|██████████| 42.0M/42.0M [00:03<00:00, 12.4MB/s]
Generating train split: 100%|██████████| 25000/25000 [00:00<00:00, 251260.65 examples/s]
Generating test split: 100%|██████████| 25000/25000 [00:00<00:00, 278572.83 examples/s]
Generating unsupervised split: 100%|██████████| 50000/50000 [00:00<00:00, 314789.85 examples/s]


Accuracy: 0.60
