In [58]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [None]:
# Suppress the specific DataConversionWarning
warnings.filterwarnings("ignore")
#Load Files
df = pd.read_csv('mushroom.csv')

In [None]:
df

In [None]:
df.isnull().sum()
df[df.duplicated()]
df.drop_duplicates(keep = 'first')
df.isnull().sum()


In [None]:
num_col = [i for i in df.columns if df[i].dtypes!='O']
num_col


In [None]:
for i in num_col:
    df.boxplot(column = num_col, fontsize = 7)

plt.show()

In [None]:
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
IQR

In [None]:
LB = Q1 - 1.5*IQR
UB = Q3 + 1.5*IQR
df_cleaned = df[~((df < LB) | (df > UB)).any(axis=1)]
df_cleaned

In [None]:
df_cleaned.describe()

In [None]:
for i in num_col:
    df_cleaned.boxplot(column = num_col, fontsize = 7)

plt.show()

In [None]:
num_col = [i for i in df_cleaned.columns if df_cleaned[i].dtypes!='O']
num_col

In [None]:
df_cleaned[num_col].corr()

In [None]:
sns.kdeplot( df_cleaned, shade=True, color='skyblue')
plt.title('Mushroom Data density plot')
plt.xlabel('Value')
plt.ylabel('Density')
plt.show()

In [None]:
Q1_test = df_cleaned.quantile(0.25)
Q3_test = df_cleaned.quantile(0.75)
IQR_test = Q3_test - Q1_test
LB_test = Q1_test - 1.5*IQR_test
UB_test = Q3_test + 1.5*IQR_test
test_data = df_cleaned[~((df_cleaned < LB) | (df_cleaned > UB)).any(axis=1)]
test_data

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [None]:
lab_enc = LabelEncoder()

In [None]:
test_data['cap_shape']= lab_enc.fit_transform(test_data[['cap_shape']])
test_data['cap_surface']= lab_enc.fit_transform(test_data[['cap_surface']])
test_data['cap_color']= lab_enc.fit_transform(test_data[['cap_color']])
test_data['bruises']= lab_enc.fit_transform(test_data[['bruises']])
test_data['odor']= lab_enc.fit_transform(test_data[['odor']])
test_data['gill_attachment']= lab_enc.fit_transform(test_data[['gill_attachment']])
test_data['gill_spacing']= lab_enc.fit_transform(test_data[['gill_spacing']])
test_data['gill_size']= lab_enc.fit_transform(test_data[['gill_size']])
test_data['gill_color']= lab_enc.fit_transform(test_data[['gill_color']])
test_data['veil_type']= lab_enc.fit_transform(test_data[['veil_type']])
test_data['veil_color']= lab_enc.fit_transform(test_data[['veil_color']])
test_data['ring_number']= lab_enc.fit_transform(test_data[['ring_number']])
test_data['ring_type']= lab_enc.fit_transform(test_data[['ring_type']])
test_data['spore_print_color']= lab_enc.fit_transform(test_data[['spore_print_color']])
test_data['population']= lab_enc.fit_transform(test_data[['population']])
test_data['habitat']= lab_enc.fit_transform(test_data[['habitat']])
test_data['class']= lab_enc.fit_transform(test_data[['class']])
test_data['stalk_shape']= lab_enc.fit_transform(test_data[['stalk_shape']])
test_data['stalk_root']= lab_enc.fit_transform(test_data[['stalk_root']])
test_data['stalk_surface_above_ring']= lab_enc.fit_transform(test_data[['stalk_surface_above_ring']])
test_data['stalk_surface_below_ring']= lab_enc.fit_transform(test_data[['stalk_surface_below_ring']])
test_data['stalk_color_above_ring']= lab_enc.fit_transform(test_data[['stalk_color_above_ring']])
test_data['stalk_color_below_ring']= lab_enc.fit_transform(test_data[['stalk_color_below_ring']])
import warnings
warnings.filterwarnings('ignore')
test_data

In [None]:
#test_data.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
test_data

In [None]:
target = test_data[['Unnamed: 0']]
target

In [None]:
feature = test_data.drop('Unnamed: 0', axis =1)
feature

In [None]:
# Getting the number of rows and columns
rows, columns = test_data.shape
print(f"Rows: {rows}, Columns: {columns}")

In [None]:
# Split into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.2, random_state=42)

In [None]:
# Display the shapes of the splits
print("Training Features (X_train):", X_train.shape)
print("Testing Features (X_test):", X_test.shape)
print("Training Target (y_train):", y_train.shape)
print("Testing Target (y_test):", y_test.shape)

In [None]:
# Compute correlation matrix
correlation_matrix = feature.corr()
# Plot correlation heatmap
plt.figure(figsize=(4, 2))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
scaler = StandardScaler()
feature = scaler.fit_transform(feature)
feature

In [None]:
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.2, random_state=42)
# Display the shapes of the splits
print("Training Features (X_train):", X_train.shape)
print("Testing Features (X_test):", X_test.shape)
print("Training Target (y_train):", y_train.shape)
print("Testing Target (y_test):", y_test.shape)

In [None]:
svc = SVC(C=1.0, kernel='linear', gamma=0.2) 
svc.fit(X_train,y_train)
y_pred=svc.predict(X_test)
accuracy_score(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
params={'C': [1,2,3,4,5,6,7,8], 'kernel': ['rtb', 'linner', 'sigmoid', 'poly'], 'gamma': [0.1, 0.2, 0.3,0.4]}
GridSearchCV(svc,params)

In [None]:
grid_search = GridSearchCV(estimator=svm, param_grid=params, cv=5, n_jobs=-1, verbose=2)

grid_search.fit(X_train, y_train)

In [None]:
print(f"Best hyperparameters: {grid_search.best_params_}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

# Plotting confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=iris.target_names, yticklabels=iris.target_names)
plt.title("Confusion Matrix")
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
