In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

# Step 0: Read data into a pandas dataframe
data = pd.read_csv('data_banknote_authentication.csv')

# Step 1: Split data into features (X) and target variable (y)
X = data.drop('class', axis=1)
y = data['class']

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)

# Step 3: Use support vector classifier with linear kernel
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train, y_train)

# Step 4: Predict on the testing data and compute confusion matrix and classification report
y_pred_linear = svm_linear.predict(X_test)
conf_matrix_linear = confusion_matrix(y_test, y_pred_linear)
class_report_linear = classification_report(y_test, y_pred_linear)

# Step 5: Repeat steps 3 and 4 for the radial basis function kernel
svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_train, y_train)
y_pred_rbf = svm_rbf.predict(X_test)
conf_matrix_rbf = confusion_matrix(y_test, y_pred_rbf)
class_report_rbf = classification_report(y_test, y_pred_rbf)

# Step 6: Compare the two SVM models
# Compare the two SVM models
print("Performance metrics for Linear Kernel SVM:")
print(conf_matrix_linear)
print(class_report_linear)

print("\nPerformance metrics for RBF Kernel SVM:")
print(conf_matrix_rbf)
print(class_report_rbf)


FileNotFoundError: [Errno 2] No such file or directory: 'data_banknote_authentication.csv'

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Step 0: Read data into a pandas dataframe
data = pd.read_csv('weight-height.csv')

# Step 1: Pick the target variable y as weight in kilograms, and the feature variable X as height in centimeters
X = data['Height'].values.reshape(-1, 1)  # Reshape to 2D array for sklearn
y = data['Weight']

# Step 2: Split the data into training and testing sets with 80/20 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Scale the training and testing data using normalization and standardization
scaler_norm = MinMaxScaler()
X_train_norm = scaler_norm.fit_transform(X_train)
X_test_norm = scaler_norm.transform(X_test)

scaler_std = StandardScaler()
X_train_std = scaler_std.fit_transform(X_train)
X_test_std = scaler_std.transform(X_test)

# Step 4: Fit a KNN regression model with k=5 to the training data without scaling
knn_reg = KNeighborsRegressor(n_neighbors=5)
knn_reg.fit(X_train, y_train)
y_pred = knn_reg.predict(X_test)
r2_unscaled = r2_score(y_test, y_pred)

# Step 5: Repeat step 4 for normalized data
knn_reg.fit(X_train_norm, y_train)
y_pred_norm = knn_reg.predict(X_test_norm)
r2_normalized = r2_score(y_test, y_pred_norm)

# Step 6: Repeat step 4 for standardized data
knn_reg.fit(X_train_std, y_train)
y_pred_std = knn_reg.predict(X_test_std)
r2_standardized = r2_score(y_test, y_pred_std)

# Step 7: Compare the models in terms of their R2 value
print("R2 value for unscaled data:", r2_unscaled)
print("R2 value for normalized data:", r2_normalized)
print("R2 value for standardized data:", r2_standardized)

FileNotFoundError: [Errno 2] No such file or directory: 'weight-height.csv'