# CUDA and [RAPIDS](https://developer.nvidia.com/rapidshttps://developer.nvidia.com/rapids) Tests

## Iris Database

In [2]:
from cuml import SVC
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load iris dataset
iris = datasets.load_iris()
X = iris.data
# The iris.target gives labels as integers. But let's convert those integer labels 
# back to their original string format to simulate a real-world scenario 
# where labels might be strings.
target_names = iris.target_names
y = target_names[iris.target]

# Encode string labels into numbers
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Create a cuML SVC model
svm = SVC(kernel='linear')

# Train the model
svm.fit(X_train, y_train)

# Make predictions on the test data
predictions = svm.predict(X_test)

# Print the predictions
print(predictions)


[W] [12:37:44.333186] SVC with the linear kernel can be much faster using the specialized solver provided by LinearSVC. Consider switching to LinearSVC if tranining takes too long.
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


## Test Performance

### Using CPU/GPU

In [4]:
import time
from sklearn.datasets import make_blobs
from sklearn.manifold import TSNE
from cuml.manifold import UMAP

# Create a large dataset
n_samples = 10_000
n_features = 20
X, _ = make_blobs(n_samples=n_samples, n_features=n_features)

print(f'CPU VS GPU: Testing with [{n_samples:,.0f}] samples and [{n_features:,.0f}] features !')

# Run UMAP with scikit-learn (CPU)
print('BEGIN [TSNE (sklearn, CPU)] (~30s)...')
start = time.time()
tsne = TSNE(n_components=2)
X_transformed_sklearn = tsne.fit_transform(X)
end = time.time()
print(f"END: TSNE (sklearn, CPU). Execution time: {(end - start):.3f} seconds")

# Run UMAP with cuML (GPU)
print('BEGIN [UMAP (cuML, GPU)] ...')
start = time.time()
umap = UMAP(n_components=2)
X_transformed_cuml = umap.fit_transform(X)
end = time.time()
print(f"END [ UMAP (cuML, GPU)]. Execution time: {(end - start):.3f} seconds")


CPU VS GPU: Testing with [10,000] samples and [20] features !
BEGIN [TSNE (sklearn, CPU)] (~30s)...
END: TSNE (sklearn, CPU). Execution time: 27.130 seconds
BEGIN [UMAP (cuML, GPU)] ...
END [ UMAP (cuML, GPU)]. Execution time: 0.412 seconds


### Using GPU only

In [3]:
import time
from sklearn.datasets import make_blobs
from sklearn.manifold import TSNE
from cuml.manifold import UMAP

# Create a large dataset
n_samples = 1_000_000
n_features = 20
X, _ = make_blobs(n_samples=n_samples, n_features=n_features)

print(f'GPU: Testing with [{n_samples:,.0f}] samples and [{n_features:,.0f}] features !')

# Run UMAP with cuML (GPU)
print('BEGIN [UMAP (cuML, GPU)] ...')
start = time.time()
umap = UMAP(n_components=2)
X_transformed_cuml = umap.fit_transform(X)
end = time.time()
print(f"END [ UMAP (cuML, GPU)]. Execution time: {(end - start):.3f} seconds")

GPU: Testing with [1,000,000] samples and [20] features !
BEGIN [UMAP (cuML, GPU)] ...
END [ UMAP (cuML, GPU)]. Execution time: 76.678 seconds
