## Classification (binary) of wheather Mushroom is poisoned or not

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("./csv/mushroom.csv")

In [4]:
df.head()

Unnamed: 0,cap-diameter,cap-shape,gill-attachment,gill-color,stem-height,stem-width,stem-color,season,class
0,1372,2,2,10,3.807467,1545,11,1.804273,1
1,1461,2,2,10,3.807467,1557,11,1.804273,1
2,1371,2,2,10,3.612496,1566,11,1.804273,1
3,1261,6,2,10,3.787572,1566,11,1.804273,1
4,1305,6,2,10,3.711971,1464,11,0.943195,1


In [5]:
df['class'].unique()

array([1, 0], dtype=int64)

In [6]:
df.isnull().sum()

cap-diameter       0
cap-shape          0
gill-attachment    0
gill-color         0
stem-height        0
stem-width         0
stem-color         0
season             0
class              0
dtype: int64

In [7]:
X = df.iloc[ : , :-1]
X.head()

Unnamed: 0,cap-diameter,cap-shape,gill-attachment,gill-color,stem-height,stem-width,stem-color,season
0,1372,2,2,10,3.807467,1545,11,1.804273
1,1461,2,2,10,3.807467,1557,11,1.804273
2,1371,2,2,10,3.612496,1566,11,1.804273
3,1261,6,2,10,3.787572,1566,11,1.804273
4,1305,6,2,10,3.711971,1464,11,0.943195


In [8]:
y = df.iloc[ : , -1]
y

0        1
1        1
2        1
3        1
4        1
        ..
54030    1
54031    1
54032    1
54033    1
54034    1
Name: class, Length: 54035, dtype: int64

In [9]:
y.value_counts()

class
1    29675
0    24360
Name: count, dtype: int64

## Data  Preprossing

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train,X_test, y_train,y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [12]:
# Standard scaler
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()

In [14]:
X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.fit_transform(X_test)

In [15]:
X_train.head()

Unnamed: 0,cap-diameter,cap-shape,gill-attachment,gill-color,stem-height,stem-width,stem-color,season
10135,242,6,0,10,1.27683,238,6,0.88845
5457,601,5,1,10,0.469946,704,11,0.943195
1411,887,2,0,10,1.997027,1499,11,0.943195
2908,343,2,2,5,0.589316,305,11,0.943195
1895,625,0,2,10,2.53419,1149,11,0.88845


In [16]:
X_train_scaler[15795]

array([ 1.19342264,  0.4648552 , -0.51076625, -0.72616227, -0.90136977,
        0.22655477, -0.74211062, -0.20633721])

In [17]:
y_train


10135    1
5457     0
1411     1
2908     1
1895     1
        ..
11284    1
44732    1
38158    1
860      0
15795    0
Name: class, Length: 43228, dtype: int64

## Perceptron Trick

In [50]:
def perceptron(X_train, y_train, learning_rate=0.01, epochs=100):
    # Preparing X_train by adding a bias term
    X_train = np.insert(X_train, 0, 1, axis=1)  # Insert 1s for the bias term at the 0th index
    
    # Initializing weights
    W = np.ones(X_train.shape[1])  # Total number of columns, including the bias
    
    for _ in range(epochs):
        # Selecting a random index from the data
        idx = np.random.randint(0, X_train.shape[0])  #from 0 to total columns
        
        # Predicting the data
        y_new = np.dot(X_train[idx], W) 
        y_hat = step(y_new)  # Using the step function to return value either 0 or 1
        
        
        # Updating the weights
        W = W + learning_rate * (y_train.iloc[idx] - y_hat) * X_train[idx]
    
    return W[0], W[1:]




In [51]:
def step(z):
    if z > 0:
        return 1
    else:
        return 0

In [52]:
#training data and calculating coefficients
intercept_ , coef_ = perceptron(X_train,y_train,learning_rate=0.01, epochs=1000)

In [53]:
print(intercept_)

1.1900000000000002


In [54]:
print(coef_)

[4.23       0.96       1.21       1.95       1.9638146  7.31
 0.97       1.10524543]


In [55]:
#predicted
data = np.array([-0.90319838,  0.92730337, -0.95948901,  0.83557181,  0.79307595,
       -1.04000676, -0.74211062, -0.20633721])
result = np.dot(data, coef_) +  intercept_
step(result)

0

In [56]:
data1 = np.array([ 1.19342264,  0.4648552 , -0.51076625, -0.72616227, -0.90136977,
        0.22655477, -0.74211062, -0.20633721])
result1 = np.dot(data, coef_) +  intercept_
step(result1)

0

In [57]:
from sklearn.metrics import accuracy_score

In [58]:
def perceptron_predict(X, W):
    # Prepare X by adding the bias term
    X = np.insert(X, 0, 1, axis=1)  # Insert 1s for the bias term at the 0th index
    y_pred = np.dot(X, W)  # Predicting the data
    return step(y_pred)  # Using the step function to return value either 0 or 1

def step(x):
    return np.where(x >= 0, 1, 0)  # Step function, returns 1 if x >= 0 else 0

def calculate_accuracy(X_train, y_train, W):
    y_pred = perceptron_predict(X_train, W)  # Make predictions using the trained weights
    accuracy = accuracy_score(y_pred, y_train)  # Calculate the accuracy
    return accuracy




In [59]:

# Combine intercept and coefficients into a single weight vector
W = np.insert(coef_, 0, intercept_)

# Check accuracy
accuracy = calculate_accuracy(X_test, y_test, W)
# print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 54.58%


## Using Sigmoid

In [43]:
def usingSigmoid(X_train, y_train, learning_rate=0.01, epochs=100):
    # Preparing X_train by adding a bias term
    X_train = np.insert(X_train, 0, 1, axis=1)  # Insert 1s for the bias term at the 0th index
    
    # Initializing weights
    W = np.ones(X_train.shape[1])  # Total number of columns, including the bias
    
    for _ in range(epochs):
        # Selecting a random index from the data
        idx = np.random.randint(0, X_train.shape[0])  #from 0 to total columns
        
        # Predicting the data
        y_new = np.dot(X_train[idx], W) 
        y_hat = sigmoid(y_new)  # Using the step function to return value either 0 or 1
        
        
        # Updating the weights
        W = W + learning_rate * (y_train.iloc[idx] - y_hat) * X_train[idx]
    
    return W[0], W[1:]




In [44]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [45]:
#training data and calculating coefficients
intercept_ , coef_ = usingSigmoid(X_train,y_train,learning_rate=0.1, epochs=1000)

  return 1/(1+np.exp(-z))


In [46]:
print(intercept_)
print(coef_)

6.599999999999993
[111.3          8.6          8.4         31.3         11.97043922
 -13.9         21.3          3.99632487]


In [47]:
from sklearn.metrics import accuracy_score

def sigmoid_predict(X, W):
    # Prepare X by adding the bias term
    X = np.insert(X, 0, 1, axis=1)  # Insert 1s for the bias term at the 0th index
    y_pred = np.dot(X, W)  # Predicting the data
    return sigmoid(y_pred)  # Using the sigmoid function to get probabilities

def sigmoid(z):
    return 1 / (1 + np.exp(-z))  # Sigmoid function

def calculate_accuracy_sigmoid(X_train, y_train, W):
    y_prob = sigmoid_predict(X_train, W)  # Get probabilities using the trained weights
    y_pred = np.where(y_prob >= 0.5, 1, 0)  # Convert probabilities to binary labels
    accuracy = accuracy_score(y_train, y_pred)  # Calculate the accuracy
    return accuracy




In [48]:

# Combine intercept and coefficients into a single weight vector
W = np.insert(coef_, 0, intercept_)

# Check accuracy
accuracy = calculate_accuracy_sigmoid(X_test, y_test, W)
# print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 54.45%


  return 1 / (1 + np.exp(-z))  # Sigmoid function


## Using Sklearn logistic regression

In [34]:

from sklearn.linear_model import LogisticRegression

In [35]:
lgr = LogisticRegression(max_iter=1000, solver='lbfgs')

In [36]:
lgr.fit(X_train,y_train)

In [37]:
y_pred = lgr.predict(X_test)

In [38]:
from sklearn.metrics  import accuracy_score, confusion_matrix

In [39]:
accuracy_score(y_test,y_pred)

0.6365318774868141

In [40]:
pd.DataFrame(confusion_matrix(y_test,y_pred))

Unnamed: 0,0,1
0,2642,2267
1,1661,4237


In [41]:
test = np.array([-0.90319838,  0.92730337, -0.95948901,  0.83557181,  0.79307595,
       -1.04000676, -0.74211062, -0.20633721])
lgr.predict([test])



array([1], dtype=int64)

In [42]:
test1 = np.array([ 1.19342264,  0.4648552 , -0.51076625, -0.72616227, -0.90136977,
        0.22655477, -0.74211062, -0.20633721])
lgr.predict([test1])



array([1], dtype=int64)

## See you Soon 