In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the Abalone dataset
abalone_data = pd.read_csv(r'datasets\adalone.csv', sep=',')

# Preprocessing Abalone dataset: Encode 'Sex' categorical feature and normalize data
abalone_data['Sex'] = abalone_data['Sex'].map({'M': 0, 'F': 1, 'I': 2})  # Encoding categorical data
X_abalone = abalone_data.drop('Rings', axis=1)  # Features
y_abalone = abalone_data['Rings']  # Target


In [4]:
abalone_data.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,0,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,2,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [13]:
scaler = StandardScaler()
X_abalone_scaled = scaler.fit_transform(X_abalone)

In [11]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X_cancer = data.data
y_cancer = data.target

In [14]:
# Normalize the Breast Cancer data
X_cancer_scaled = scaler.fit_transform(X_cancer)

In [15]:
from sklearn.decomposition import PCA

# PCA for Abalone dataset
pca_abalone = PCA()
X_abalone_pca = pca_abalone.fit_transform(X_abalone_scaled)

# Reduce Abalone data to 4 components
pca_abalone_4 = PCA(n_components=4)
X_abalone_pca_4 = pca_abalone_4.fit_transform(X_abalone_scaled)

# PCA for Breast Cancer dataset
pca_cancer = PCA()
X_cancer_pca = pca_cancer.fit_transform(X_cancer_scaled)

# Reduce Breast Cancer data to 20 and 10 components
pca_cancer_20 = PCA(n_components=20)
X_cancer_pca_20 = pca_cancer_20.fit_transform(X_cancer_scaled)

pca_cancer_10 = PCA(n_components=10)
X_cancer_pca_10 = pca_cancer_10.fit_transform(X_cancer_scaled)

In [16]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

# SVM on original Abalone data
svm_abalone = SVC(kernel='linear')

In [17]:
scores_abalone = cross_val_score(svm_abalone, X_abalone_scaled, y_abalone, cv=10)
print("SVM Accuracy on Original Abalone Data:", scores_abalone.mean())

# SVM on PCA-transformed Abalone data (all components)
scores_abalone_pca = cross_val_score(svm_abalone, X_abalone_pca, y_abalone, cv=10)
print("SVM Accuracy on PCA (all components) Abalone Data:", scores_abalone_pca.mean())

# SVM on PCA-transformed Abalone data (4 components)
scores_abalone_pca_4 = cross_val_score(svm_abalone, X_abalone_pca_4, y_abalone, cv=10)
print("SVM Accuracy on PCA (4 components) Abalone Data:", scores_abalone_pca_4.mean())

# SVM on original Breast Cancer data
svm_cancer = SVC(kernel='linear')
scores_cancer = cross_val_score(svm_cancer, X_cancer_scaled, y_cancer, cv=10)
print("SVM Accuracy on Original Breast Cancer Data:", scores_cancer.mean())

# SVM on PCA-transformed Breast Cancer data (all components)
scores_cancer_pca = cross_val_score(svm_cancer, X_cancer_pca, y_cancer, cv=10)
print("SVM Accuracy on PCA (all components) Breast Cancer Data:", scores_cancer_pca.mean())

# SVM on PCA-transformed Breast Cancer data (20 components)
scores_cancer_pca_20 = cross_val_score(svm_cancer, X_cancer_pca_20, y_cancer, cv=10)
print("SVM Accuracy on PCA (20 components) Breast Cancer Data:", scores_cancer_pca_20.mean())

# SVM on PCA-transformed Breast Cancer data (10 components)
scores_cancer_pca_10 = cross_val_score(svm_cancer, X_cancer_pca_10, y_cancer, cv=10)
print("SVM Accuracy on PCA (10 components) Breast Cancer Data:", scores_cancer_pca_10.mean())



SVM Accuracy on Original Abalone Data: 0.25760559016901313




SVM Accuracy on PCA (all components) Abalone Data: 0.25760559016901313




SVM Accuracy on PCA (4 components) Abalone Data: 0.24301114132617352
SVM Accuracy on Original Breast Cancer Data: 0.9753759398496239
SVM Accuracy on PCA (all components) Breast Cancer Data: 0.9753759398496239
SVM Accuracy on PCA (20 components) Breast Cancer Data: 0.9771303258145363
SVM Accuracy on PCA (10 components) Breast Cancer Data: 0.9753759398496239
