In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('/kaggle/input/plastic/plastic (1).csv')
data.dropna(inplace=True)
one_hot_cols=['Gender', 'Residence', 'Occupation', 'location_of_usage', 'dispose_outdoor', 'waste_around_you', 'disposal_behaviour', 'awareness_source', 'way_to_reduce', 'alternatives']
age_order= ['Young', 'Middle Aged', 'Old']
edu_order= ['Nothing', 'School', 'College', 'University']
agree_order= ['Strongly Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly Agree']
time_deg_ord= ['days50', '50 years', 'Donâ€™t know', '500 years', 'Never degrade']
aware_symb_ord= ['I never saw it', 'No', 'Yes']
freq_order= ['Always',  'Usually','Often', 'Sometimes', 'Never']
reuse_order= ['Didnâ€™t reuse', 'Donâ€™t reuse', 'One time', 'more than 5','Few weeks', 'Few months', '2-1 years']
yn_order= ['No', 'Yes']
neg_order= ["I don't think about the negative impact of plastic", "I think about negative impact of plastic but I buy it anyways" , "I think about the negative impact of the plastic and sometimes I don not buy it", "I think about negative impact and look for the alternative"]

In [2]:
from sklearn.preprocessing import OrdinalEncoder

encoded_df = data.copy()

ordinal_encoder = OrdinalEncoder(categories=[age_order, edu_order, agree_order, time_deg_ord, aware_symb_ord, agree_order, agree_order, agree_order, reuse_order, freq_order, reuse_order, freq_order, freq_order, freq_order, freq_order, freq_order, yn_order, yn_order, agree_order, agree_order, neg_order])
ordinal_encoded = ordinal_encoder.fit_transform(data[['Age', 'Education', 'Environmental_affect', 'Time_to_degrade', 'Recycling_symbol', 'Microplastic_in_food', 'Chemical_absorbtion', 'Recycle_plastic',  'bag_reuse', 'takeyourbag', 'bottle_reuse', 'cleaning_responsibility', 'plastic_over_other', 'ensure_biodegradable', 'microwavesafe', 'food_in_bag', 'special_bins', 'teaching_kids', 'willing_to_adopt', 'survival_without_plastic', 'negative_impact' ]])
ordinal_encoded_df = pd.DataFrame(ordinal_encoded, columns=['Age', 'Education', 'Environmental_affect', 'Time_to_degrade', 'Recycling_symbol', 'Microplastic_in_food', 'Chemical_absorbtion', 'Recycle_plastic', 'bag_reuse', 'takeyourbag', 'bottle_reuse', 'cleaning_responsibility', 'plastic_over_other', 'ensure_biodegradable' , 'microwavesafe', 'food_in_bag', 'special_bins', 'teaching_kids', 'willing_to_adopt', 'survival_without_plastic', 'negative_impact'])

# Perform one-hot encoding for remaining columns
one_hot_encoded = pd.get_dummies(data[one_hot_cols], drop_first=True)
one_hot_encoded= one_hot_encoded.astype(float)

# Combine one-hot encoded and ordinal encoded dataframes with the original dataframe
encoded_df = pd.concat([one_hot_encoded, ordinal_encoded_df], axis=1)

# for cols in encoded_df.columns:
#     print(type(encoded_df[cols][0]))
# print(encoded_df.head())
encoded_df=encoded_df.dropna()
# encoded_df

In [3]:
y_columns = ['survival_without_plastic', 'Time_to_degrade', 'cleaning_responsibility', 'negative_impact', 'teaching_kids']
X_columns = [col for col in encoded_df.columns if col not in y_columns]
y=encoded_df[y_columns]
X=encoded_df[X_columns]
# Convert NumPy arrays to pandas DataFrame
# y = pd.DataFrame(data=y, columns=y_columns)
# X = pd.DataFrame(data=X, columns=X_columns)

# X.dropna(inplace=True)
# y.dropna(inplace=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)#data split to train and test 

print("Shape: ", X.shape, "Dimension: ", X.ndim)
print("Shape: ", y.shape, "Dimension: ", y.ndim)
# y

Shape:  (223, 56) Dimension:  2
Shape:  (223, 5) Dimension:  2


In [4]:
# Define the MultiClassLogisticRegression class
class MultiClassLogisticRegression:
   
    def __init__(self, n_iter=10000, thres=1e-3):
        self.n_iter = n_iter
        self.thres = thres
        self.eps= 1e-9
   
    def fit(self, X, y, batch_size=64, lr=0.001, rand_seed=4, verbose=False):
        np.random.seed(rand_seed)
        self.classes = np.unique(y)
        self.class_labels = {c:i for i,c in enumerate(self.classes)}
        X = self.add_bias(X)
        y = self.one_hot(y)
        self.loss = []
        self.weights = np.zeros(shape=(len(self.classes),X.shape[1]))
        self.fit_data(X, y, batch_size, lr, verbose)
        return self
 
    def fit_data(self, X, y, batch_size, lr, verbose):
        i = 0
        while (not self.n_iter or i < self.n_iter):
            self.loss.append(self.cross_entropy(y, self.predict_(X)))
            idx = np.random.choice(X.shape[0], batch_size)
            X_batch, y_batch = X[idx], y[idx]
            error = y_batch - self.predict_(X_batch)
            update = (lr * np.dot(error.T, X_batch))
            self.weights += update
            if np.abs(update).max() < self.thres: break
            if i % 1000 == 0 and verbose:
                print(' Training Accuray at {} iterations is {}'.format(i, self.evaluate_(X, y)))
            i += 1
   
    def predict(self, X):
        return self.predict_(self.add_bias(X))
   
    def predict_(self, X):
        pre_vals = np.dot(X, self.weights.T).reshape(-1,len(self.classes))
        return self.softmax(pre_vals)
   
    def softmax(self, z):
        return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
   
    def predict_classes(self, X):
        self.probs_ = self.predict(X)
        return np.vectorize(lambda c: self.classes[c])(np.argmax(self.probs_, axis=1))
 
    def add_bias(self, X):
        return np.insert(X, 0, 1, axis=1)
 
    def one_hot(self, y):
        return np.eye(len(self.classes))[np.vectorize(lambda c: self.class_labels[c])(y).reshape(-1)]
   
    def score(self, X, y):
        return np.mean(self.predict_classes(X) == y)
   
    def evaluate_(self, X, y):
        return np.mean(np.argmax(self.predict_(X), axis=1) == np.argmax(y, axis=1))
   
    def cross_entropy(self, y, probs):
        return -1 * np.mean(y * np.log(probs+self.eps))

In [5]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Define the hyperparameters grid
lr_values = [0.001, 0.01, 0.1]
n_iter_values = [1000, 5000, 10000, 20000]
batch_size_values = [8, 16, 32, 64]

X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

# Perform grid search

for col in ['survival_without_plastic', 'Time_to_degrade', 'cleaning_responsibility', 'negative_impact', 'teaching_kids']:
    best_accuracy = 0
    best_params = {}
    for lr in lr_values:
        for n_iter in n_iter_values:
            for batch_size in batch_size_values:
                print("Training with parameters - lr: {}, n_iter: {}, batch_size: {}".format(lr, n_iter, batch_size))
                multiclass_logreg = MultiClassLogisticRegression(n_iter=n_iter, thres=1e-3)
                multiclass_logreg.fit(X_train_1, y_train_1[col], batch_size=batch_size, lr=lr)
                y_pred = multiclass_logreg.predict_classes(X_train_2)
                accuracy = accuracy_score(y_train_2[col], y_pred)
                print("Accuracy:", accuracy)
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = {'lr': lr, 'n_iter': n_iter, 'batch_size': batch_size}
                    
    multiclass_logreg = MultiClassLogisticRegression(n_iter=best_params['n_iter'], thres=1e-3)
    multiclass_logreg.fit(X_train, y_train[col], batch_size=best_params['batch_size'], lr=best_params['lr'])
    y_pred = multiclass_logreg.predict_classes(X_test)
    accuracy = accuracy_score(y_test[col], y_pred)
    y_true=y_test[col]
    precision = precision_score(y_true, y_pred, average='weighted')  # Use 'weighted' if you have multiclass classification

    # Recall
    recall = recall_score(y_true, y_pred, average='weighted')  # Use 'weighted' if you have multiclass classification

    # F1-score
    f1 = f1_score(y_true, y_pred, average='weighted')  # Use 'weighted' if you have multiclass classification

    print("\nAccuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    
    print("\nBest parameters:", best_params)
#     print("Best accuracy:", best_accuracy)


Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 8
Accuracy: 0.5370370370370371
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 16
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 32
Accuracy: 0.5185185185185185
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 64
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 8
Accuracy: 0.5370370370370371
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 16
Accuracy: 0.4444444444444444
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 32
Accuracy: 0.4444444444444444
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 64
Accuracy: 0.42592592592592593
Training with parameters - lr: 0.001, n_iter: 10000, batch_size: 8
Accuracy: 0.4444444444444444
Training with parameters - lr: 0.001, n_iter: 10000, batch_size: 16
Accuracy: 0.4444444444444444
Training with parameters - lr: 0.001, n

  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.037037037037037035
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 8
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 16
Accuracy: 0.5
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 32
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.037037037037037035
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 8
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 16
Accuracy: 0.5
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 32
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.037037037037037035
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 8
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 16
Accuracy: 0.5
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 32
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.037037037037037035


  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5555555555555556
Precision: 0.4844444444444444
Recall: 0.5555555555555556
F1-score: 0.45444444444444443

Accuracy: 0.5555555555555556 


Best parameters: {'lr': 0.1, 'n_iter': 1000, 'batch_size': 16}
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 8
Accuracy: 0.42592592592592593
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 16
Accuracy: 0.42592592592592593
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 32
Accuracy: 0.37037037037037035
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 64
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 8
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 16
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 32
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 64
Accuracy: 0.3888888888888889
Training with param

  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4888888888888889
Precision: 0.48644122383252814
Recall: 0.4888888888888889
F1-score: 0.4584536791433343

Accuracy: 0.4888888888888889 


Best parameters: {'lr': 0.001, 'n_iter': 1000, 'batch_size': 8}
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 8
Accuracy: 0.5555555555555556
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 16
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 32
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 64
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 8
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 16
Accuracy: 0.48148148148148145
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 32
Accuracy: 0.4444444444444444
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 64
Accuracy: 0.42592592592592593
Training with p

  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.6111111111111112
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 8
Accuracy: 0.5925925925925926
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 16
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 32
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 5000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.6111111111111112
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 8
Accuracy: 0.5925925925925926
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 16
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 32
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 10000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.6111111111111112
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 8
Accuracy: 0.5925925925925926
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 16
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 32
Accuracy: 0.35185185185185186
Training with parameters - lr: 0.1, n_iter: 20000, batch_size: 64


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)


Accuracy: 0.6111111111111112


  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  return np.exp(z) / (np.sum(np.exp(z), axis=1).reshape(-1,1) + self.eps)
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.6444444444444445
Precision: 0.41530864197530865
Recall: 0.6444444444444445
F1-score: 0.5051051051051052

Accuracy: 0.6444444444444445 


Best parameters: {'lr': 0.1, 'n_iter': 1000, 'batch_size': 64}
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 8
Accuracy: 0.42592592592592593
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 16
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 32
Accuracy: 0.4074074074074074
Training with parameters - lr: 0.001, n_iter: 1000, batch_size: 64
Accuracy: 0.46296296296296297
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 8
Accuracy: 0.42592592592592593
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 16
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 32
Accuracy: 0.3888888888888889
Training with parameters - lr: 0.001, n_iter: 5000, batch_size: 64
Accuracy: 0.37037037037037035
Training with para