In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('/content/Q2Q3_input.csv')


# Retrieve the array
data = df.values

# Split into input and output elements
X, y = data[:, :-1], data[:, -1]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

# Define a function for processing each model
def process_model(model, X_train, y_train, X_test, y_test, name):
    # Identify outliers
    outlier_predictions = model.fit_predict(X_train)

    # Select non-outliers
    non_outliers_mask = outlier_predictions != -1
    X_train_updated, y_train_updated = X_train[non_outliers_mask], y_train[non_outliers_mask]

    # Fit the Linear Regression model
    linear_model = LinearRegression()
    linear_model.fit(X_train_updated, y_train_updated)

    # Evaluate the model on the test set
    yhat = linear_model.predict(X_test)

    # Evaluate predictions using Mean Absolute Error
    mae = mean_absolute_error(y_test, yhat)
    print(f'MAE ({name}): %.3f' % mae)

# Isolation Forest
iso_model = IsolationForest(contamination=0.01)
process_model(iso_model, X_train, y_train, X_test, y_test, "Isolation Forest")

# Minimum Covariance Determinant
mcd_model = EllipticEnvelope(contamination=0.01)
process_model(mcd_model, X_train, y_train, X_test, y_test, "Minimum Covariance Determinant")

# Local Outlier Factor
lof_model = LocalOutlierFactor(contamination=0.01)
process_model(lof_model, X_train, y_train, X_test, y_test, "Local Outlier Factor")

# One-Class SVM
svm_model = OneClassSVM(nu=0.01)
process_model(svm_model, X_train, y_train, X_test, y_test, "One-Class SVM")


MAE (Isolation Forest): 0.622
MAE (Minimum Covariance Determinant): 0.602
MAE (Local Outlier Factor): 0.602
MAE (One-Class SVM): 0.603
