In [1]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the data
data = pd.read_csv("sortedBankLoanApproval.csv") # load the data here

# Extract the features and target
X = data.drop('Default', axis=1)
y = data['Default']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create the KNN model
knn = KNeighborsClassifier(n_neighbors=3)

# Train the model
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
print('Test accuracy:', accuracy_score(y_test, y_pred))

# Make predictions on the new data
new_data = pd.read_csv("sortedNewApplicants.csv")
new_data = scaler.transform(new_data)
new_predictions = knn.predict(new_data)

# Print the predictions
print('New predictions:', new_predictions)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Test accuracy: 0.8557748795676183
New predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [3]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

# Load the data
data = pd.read_csv("sortedBankLoanApproval.csv")

# Extract the features and target
X = data.drop('Default', axis=1)
y = data['Default']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)

# Evaluate the model
train_accuracy = knn.score(X_train_scaled, y_train)
test_accuracy = knn.score(X_test_scaled, y_test)
print('Train accuracy:', train_accuracy)
print('Test accuracy:', test_accuracy)

# Make predictions on the new data
new_data = pd.read_csv("sortedNewApplicants.csv")
new_data_scaled = scaler.transform(new_data)
new_predictions = knn.predict(new_data_scaled)

# Print the predictions
print('New predictions:', new_predictions)


Train accuracy: 0.9050283705651103
Test accuracy: 0.8557748795676183
New predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [None]:
# Import necessary libraries
import cudf
from cuml.model_selection import train_test_split
from cuml.preprocessing import StandardScaler
from cuml.linear_model import LogisticRegression
import pandas as pd

# Load the data using cuDF
data = cudf.read_csv("sortedBankLoanApproval.csv")

# Extract the features and target
X = data.drop('Default', axis=1)
y = data['Default']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features using cuML's StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the Logistic Regression model using cuML
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)

# Evaluate the model
train_accuracy = log_reg.score(X_train_scaled, y_train)
test_accuracy = log_reg.score(X_test_scaled, y_test)
print('Train accuracy:', train_accuracy)
print('Test accuracy:', test_accuracy)

# Make predictions on the new data
new_data = cudf.read_csv("sortedNewApplicants.csv")
new_data_scaled = scaler.transform(new_data)
new_predictions = log_reg.predict(new_data_scaled)

# Print the predictions
print('New predictions:', new_predictions.to_array())
