In [131]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RepeatedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [133]:
# Load the data
df = pd.read_csv('/Users/abuqais/Desktop/UON/ML/MushroomDataset/secondary_data.csv', sep=';')

df.head()

Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,p,15.26,x,g,o,f,e,,w,16.95,...,s,y,w,u,w,t,g,,d,w
1,p,16.6,x,g,o,f,e,,w,17.99,...,s,y,w,u,w,t,g,,d,u
2,p,14.07,x,g,o,f,e,,w,17.8,...,s,y,w,u,w,t,g,,d,w
3,p,14.17,f,h,e,f,e,,w,15.77,...,s,y,w,u,w,t,p,,d,w
4,p,14.64,x,h,o,f,e,,w,16.53,...,s,y,w,u,w,t,p,,d,w


In [135]:
# Convert class labels to binary (edible=1, poisonous=0)
le = LabelEncoder()
df['class'] = le.fit_transform(df['class'])
df.head()

Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,1,15.26,x,g,o,f,e,,w,16.95,...,s,y,w,u,w,t,g,,d,w
1,1,16.6,x,g,o,f,e,,w,17.99,...,s,y,w,u,w,t,g,,d,u
2,1,14.07,x,g,o,f,e,,w,17.8,...,s,y,w,u,w,t,g,,d,w
3,1,14.17,f,h,e,f,e,,w,15.77,...,s,y,w,u,w,t,p,,d,w
4,1,14.64,x,h,o,f,e,,w,16.53,...,s,y,w,u,w,t,p,,d,w


In [137]:
# Convert categorical variables
categorical_columns = ['cap-shape', 'cap-surface', 'cap-color', 
                         'does-bruise-or-bleed', 'gill-attachment', 
                         'gill-spacing', 'gill-color', 'stem-root', 
                         'stem-surface', 'stem-color', 'veil-type', 
                         'veil-color', 'has-ring', 'ring-type',
                         'spore-print-color', 'habitat', 'season']
    
for col in categorical_columns:
    if col in df.columns:
        df[col] = le.fit_transform(df[col].astype(str))
df.head()

Unnamed: 0,class,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,...,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,1,15.26,6,2,6,0,2,3,10,16.95,...,5,8,11,1,5,1,2,3,0,3
1,1,16.6,6,2,6,0,2,3,10,17.99,...,5,8,11,1,5,1,2,3,0,2
2,1,14.07,6,2,6,0,2,3,10,17.8,...,5,8,11,1,5,1,2,3,0,3
3,1,14.17,2,3,1,0,2,3,10,15.77,...,5,8,11,1,5,1,6,3,0,3
4,1,14.64,6,3,6,0,2,3,10,16.53,...,5,8,11,1,5,1,6,3,0,3


In [139]:
    # Separate features and target
    X = df.drop('class', axis=1)
    y = df['class']
    
    X

Unnamed: 0,cap-diameter,cap-shape,cap-surface,cap-color,does-bruise-or-bleed,gill-attachment,gill-spacing,gill-color,stem-height,stem-width,stem-root,stem-surface,stem-color,veil-type,veil-color,has-ring,ring-type,spore-print-color,habitat,season
0,15.26,6,2,6,0,2,3,10,16.95,17.09,5,8,11,1,5,1,2,3,0,3
1,16.60,6,2,6,0,2,3,10,17.99,18.19,5,8,11,1,5,1,2,3,0,2
2,14.07,6,2,6,0,2,3,10,17.80,17.74,5,8,11,1,5,1,2,3,0,3
3,14.17,2,3,1,0,2,3,10,15.77,15.98,5,8,11,1,5,1,6,3,0,3
4,14.64,6,3,6,0,2,3,10,16.53,17.20,5,8,11,1,5,1,6,3,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61064,1.18,5,8,11,0,3,2,2,3.93,6.22,3,5,12,0,3,0,1,3,0,0
61065,1.27,2,8,11,0,3,2,2,3.18,5.43,3,5,12,0,3,0,1,3,0,0
61066,1.27,5,8,11,0,3,2,2,3.86,6.37,3,5,12,0,3,0,1,3,0,2
61067,1.24,2,8,11,0,3,2,2,3.56,5.44,3,5,12,0,3,0,1,3,0,2


In [141]:
y

0        1
1        1
2        1
3        1
4        1
        ..
61064    1
61065    1
61066    1
61067    1
61068    1
Name: class, Length: 61069, dtype: int64

In [145]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Print results
print("Classification Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Classification Results:
Accuracy: 0.6542

Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.52      0.57      5374
           1       0.67      0.76      0.71      6840

    accuracy                           0.65     12214
   macro avg       0.65      0.64      0.64     12214
weighted avg       0.65      0.65      0.65     12214



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


K fold Added Here

In [None]:
# Load the data
df = pd.read_csv('/Users/abuqais/Desktop/UON/ML/MushroomDataset/secondary_data.csv', sep=';')

df.head()

In [None]:
# Convert class labels to binary (edible=1, poisonous=0)
le = LabelEncoder()
df['class'] = le.fit_transform(df['class'])
df.head()

In [None]:
# Convert categorical variables
categorical_columns = ['cap-shape', 'cap-surface', 'cap-color', 
                         'does-bruise-or-bleed', 'gill-attachment', 
                         'gill-spacing', 'gill-color', 'stem-root', 
                         'stem-surface', 'stem-color', 'veil-type', 
                         'veil-color', 'has-ring', 'ring-type',
                         'spore-print-color', 'habitat', 'season']
    
for col in categorical_columns:
    if col in df.columns:
        df[col] = le.fit_transform(df[col].astype(str))
df.head()

In [143]:
k_folds = 5
kf = RepeatedKFold(n_splits=k_folds, n_repeats=3, random_state=2)

'k_folds = 5\nkf = RepeatedKFold(n_splits=k_folds, n_repeats=3, random_state=2)'

In [214]:
fold_results = []

# Perform k-fold cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(X), 1):
    # Split data
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train model
    model = Lasso(alpha=1.0)
    model.fit(X_train_scaled, y_train)
        
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store results
    fold_results.append({
        'Fold': fold,
        'MSE': mse,
        'R2': r2
    })
    
    print(f"Fold {fold}:")
    print(f"MSE: {mse:.4f}")
    print(f"R2 Score: {r2:.4f}\n")

# Calculate and print average results
avg_mse = np.mean([r['MSE'] for r in fold_results])
avg_r2 = np.mean([r['R2'] for r in fold_results])
print("Average Results:")
print(f"Average MSE: {avg_mse:.4f}")
print(f"Average R2 Score: {avg_r2:.4f}")



Fold 1:
MSE: 0.2471
R2 Score: -0.0000

Fold 2:
MSE: 0.2469
R2 Score: -0.0000

Fold 3:
MSE: 0.2460
R2 Score: -0.0005

Fold 4:
MSE: 0.2476
R2 Score: -0.0002

Fold 5:
MSE: 0.2472
R2 Score: -0.0000

Fold 6:
MSE: 0.2463
R2 Score: -0.0003

Fold 7:
MSE: 0.2470
R2 Score: -0.0000

Fold 8:
MSE: 0.2465
R2 Score: -0.0001

Fold 9:
MSE: 0.2478
R2 Score: -0.0004

Fold 10:
MSE: 0.2473
R2 Score: -0.0000

Average Results:
Average MSE: 0.2470
Average R2 Score: -0.0002
