In [3]:
import numpy as np
from sklearn import datasets # to load the Iris dataset
from sklearn.model_selection import train_test_split # to split the dataset into training and testing sets
from sklearn.preprocessing import StandardScaler # standardize features by removing the mean and scaling to unit variance
from sklearn.neighbors import KNeighborsClassifier # the K-Nearest Neighbors (KNN) algorithm for classification
from sklearn.metrics import accuracy_score

# Load the diabetes dataset
# Loads the Iris dataset, which is a well-known dataset in machine learning. 
# The dataset consists of 150 samples of iris flowers with 4 features each (sepal length, sepal width, petal length, and petal width) 
# and 3 target classes (Iris-setosa, Iris-versicolor, and Iris-virginica).
iris_X, iris_y = datasets.load_iris(return_X_y=True)

# Split train:test = 8:2
X_train, X_test, y_train, y_test = train_test_split(
    iris_X,
    iris_y,
    test_size=0.2,
    random_state=42
)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

# Predict and Evaluate test set
y_pred = knn_classifier.predict(X_test)

# Compares the predicted labels (y_pred) with the true labels (y_test) and calculates the accuracy of the model. 
# The accuracy score is the proportion of correctly classified samples.
accuracy_score(y_test, y_pred)

1.0

In [5]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

# Split train:test = 8:2
X_train, X_test, y_train, y_test = train_test_split(
diabetes_X,
diabetes_y,
test_size=0.2,
random_state=42
)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build KNN model
knn_regressor = KNeighborsRegressor(n_neighbors=5)
knn_regressor.fit(X_train, y_train)

# Predict and Evaluate test set
y_pred = knn_regressor.predict(X_test)

# Use mean_squared_error for regression evaluation
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 3047.449887640449


In [1]:
# Import library
!pip install -q datasets
import numpy as np
from datasets import load_dataset
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

# Load IMDB dataset
imdb = load_dataset("imdb")
imdb_train, imdb_test = imdb['train'], imdb['test']

# Convert text to vector using BoW
# to convert a collection of text documents into a matrix of token counts
vectorizer = CountVectorizer(max_features=1000)  # Initialize the CountVectorizer

X_train = vectorizer.fit_transform(imdb_train['text']).toarray()  # Fit and transform the training data
X_test = vectorizer.transform(imdb_test['text']).toarray()  # Transform the test data
y_train = np.array(imdb_train['label'])
y_test = np.array(imdb_test['label'])

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=1, algorithm='ball_tree')
knn_classifier.fit(X_train, y_train)

# predict test set and evaluate
y_pred = knn_classifier.predict(X_test)
accuracy_score(y_test, y_pred)