In [5]:
import pandas as pd
import numpy as np

In [6]:
data = pd.read_csv('data_multi.csv')
data

Unnamed: 0,Age,Air Pollution,Alcohol use,Level
0,33,2,4,Low
1,17,3,1,Medium
2,35,4,5,High
3,37,7,7,High
4,46,6,8,High
...,...,...,...,...
162,76,7,8,High
163,2,1,1,Low
164,77,7,8,High
165,1,1,1,Low


In [7]:
class OneVsAllLinearRegression:
    def __init__(self):
        self.models = []

    def fit(self, X, y):
        unique_classes = np.unique(y)
        for cls in unique_classes:
            y_binary = np.where(y == cls, 1, 0)
            model = self.train_linear_regression(X, y_binary)
            self.models.append((cls, model))

    def predict(self, X):
        probabilities = np.zeros((X.shape[0], len(self.models)))
        for i, (cls, model) in enumerate(self.models):
            probabilities[:, i] = self.sigmoid(np.dot(np.hstack([np.ones((X.shape[0], 1)), X]), model))
        return np.argmax(probabilities, axis=1)

    def train_linear_regression(self, X, y):
        X_with_bias = np.hstack([np.ones((X.shape[0], 1)), X])
        theta = np.linalg.inv(X_with_bias.T @ X_with_bias) @ X_with_bias.T @ y
        return theta

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

In [8]:
X = data.iloc[:, :-1].values.astype(float)
y = data.iloc[:, -1].values

label_mapping = {"Low": 0, "Medium": 1, "High": 2}
y_numeric = np.array([label_mapping[label] for label in y])

ova_lr = OneVsAllLinearRegression()
ova_lr.fit(X, y_numeric)

y_pred = ova_lr.predict(X)

accuracy = np.mean(y_pred == y_numeric)
accuracy_percentage = accuracy * 100
print("Accuracy:", accuracy_percentage,"%")

Accuracy: 79.64071856287424 %


In [9]:

test_data = pd.DataFrame({
    'Age': [30, 25, 40, 50, 35],
    'Air Pollution': [3, 5, 6, 4, 2],
    'Alcohol use': [4, 6, 7, 5, 3]
})

y_pred_test = ova_lr.predict(test_data.values)

reverse_label_mapping = {v: k for k, v in label_mapping.items()}
y_pred_test_labels = [reverse_label_mapping[label] for label in y_pred_test]

test_data['Predicted_Level'] = y_pred_test_labels

print(test_data)


   Age  Air Pollution  Alcohol use Predicted_Level
0   30              3            4             Low
1   25              5            6            High
2   40              6            7            High
3   50              4            5             Low
4   35              2            3             Low
