# Classification

## Get artificial data 

In [1]:
from sklearn.datasets import make_classification 
X, y = make_classification(n_samples=100, n_features=4,  n_informative=4, n_redundant=0, n_repeated=0, n_classes=3, n_clusters_per_class=2,  random_state=None)

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.33, random_state = 42)

# Random prediction algorithm

In [3]:
from random import seed
from random import randrange
import numpy as np

def random_algorithm(y_train, y_test):
    seed(1)
    unique = list(set(y_train))

    predicted = list()
    for row in y_test:
        index = randrange(len(unique))
        predicted.append(unique[index])
    return np.array(predicted)

In [4]:
y_random_c = random_algorithm(y_train, y_test)

# Zero rule algorithm

In [5]:
def zero_rule_algorithm_classification(y_train, y_test):

    prediction = max(set(y_train), key=list(y_train).count)
    predicted = [prediction for i in range(len(y_test))]
    return predicted

In [6]:
y_zero_c = zero_rule_algorithm_classification(y_train, y_test)

# Accuracy

In [7]:
from sklearn.metrics import accuracy_score
print('Random prediction accuracy', accuracy_score(y_test, y_random_c)) 
print('Zero prediction accuracy',accuracy_score(y_test, y_zero_c)) 

Random prediction accuracy 0.30303030303030304
Zero prediction accuracy 0.30303030303030304


# Regression

In [8]:
from sklearn.datasets import make_regression
Xr, yr = make_regression(n_samples=100, n_features=4, n_informative=3, random_state=42)

In [9]:
from sklearn.model_selection import train_test_split
Xr_train, Xr_test, yr_train, yr_test = train_test_split(Xr, yr, test_size =0.33, random_state = 42)

In [10]:
def zero_rule_algorithm_regression(y_train, y_test):
    #moda
    #median
    
    #average
    prediction = sum(y_train) / float(len(y_train))
    predicted = [prediction for i in range(len(y_test))]
    return predicted

In [11]:
y_random_r = random_algorithm(yr_train, yr_test)
y_zero_r = zero_rule_algorithm_regression(yr_train, yr_test)

In [12]:
def mape(y_true, y_pred):
    y_true = np.asanyarray(y_true)
    y_pred = np.asanyarray(y_pred)
    assert len(y_true) == len(y_pred), "Target and Prediction arrays length should be equal"
    
    if np.any(y_true==0):
        #print("Found zeroes in y_true. MAPE is undefined. Removing it from set.")
        idx = np.where(y_true==0)
        y_true = np.delete(y_true, idx)
        y_pred = np.delete(y_pred, idx)
        #print("%i samples deleted" %len(idx))
    
    mape_value = np.mean(np.abs((y_true - y_pred) / y_true))*100
    return mape_value

# Accuracy

In [13]:
print('Random prediction mape',  mape(yr_test, y_random_r)) 
print('Zero prediction mape', mape(yr_test, y_zero_r)) 

Random prediction mape 205.31593294844183
Zero prediction mape 116.06432323693923
