In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # splitting the data
from sklearn.preprocessing import StandardScaler # data normalization
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, mean_absolute_error, mean_squared_error


# Classification

In [12]:
bill_df = pd.read_csv('bill_authentication.csv')
bill_df.sample(5)

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
346,4.0972,0.46972,1.6671,0.91593,0
669,4.3634,0.46351,1.4281,2.0202,0
143,3.9232,-3.2467,3.4579,0.83705,0
753,3.1541,-5.1711,6.5991,0.57455,0
174,1.8799,2.4707,2.4931,0.37671,0


In [13]:
#Split the data
target_names = ['Variance', 'Skewness', 'Curtosis', 'Entropy']
x_var = np.asarray(bill_df[target_names])
y_var = np.asarray(bill_df['Class'])


In [14]:
def train_classificator(result_list, alg_type, algorithm, x_var, y_var, binary_func = None):
    x_train, x_test, y_train, y_test = train_test_split(x_var, y_var, test_size = 0.3, random_state = 4)
    # Uses StandardScaler to normalize the data
    x_train = StandardScaler().fit_transform(x_train)
    x_test = StandardScaler().fit_transform(x_test)
    # Sets up the regression using the training data
    algorithm.fit(x_train, y_train)
    # Predicts the values
    y_pred = algorithm.predict(x_test)
    # If a lambda function is provided then call it to obtain the y values
    if binary_func:
        y_pred = binary_func(y_pred)
    # Appends the results to the list
    result = {
        'classifier': alg_type,
        'accuracy': accuracy_score(y_test, y_pred), 
        'confusion_matrix': f'{confusion_matrix(y_test, y_pred)}',
        'classification_report': f'{classification_report(y_test, y_pred)}',
        'regressor_instance': algorithm
    }
    print(f'******************************************************')
    print(f'Classifier: {result["classifier"]}')
    print(f'Accuracy: {result["accuracy"]}')
    print(f'Confusion matrix:\n{result["confusion_matrix"]}')
    print(f'Clasification Report:\n{result["classification_report"]}')
    result_list.append(result)
    

In [15]:
result_list =  []

dtc1 = DecisionTreeClassifier()
train_classificator(result_list, 'DecisionTreeClassifier', dtc1, x_var, y_var)

rfc1 = RandomForestClassifier(max_depth = deep)
train_classificator(result_list, 'RandomForestClassifier (Default)', rfc1, x_var, y_var)

for deep in range(1, 10, 1):
    rfc2 = RandomForestClassifier(max_depth = deep)
    train_classificator(result_list, f'RandomForestClassifier (Deep={deep})', rfc2, x_var, y_var)

lrc = LogisticRegression()
train_classificator(result_list, 'Logistic regression', lrc, x_var, y_var, lambda y: [round(v) for v in y])


******************************************************
Classifier: DecisionTreeClassifier
Accuracy: 0.9830097087378641
Confusion matrix:
[[226   7]
 [  0 179]]
Clasification Report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       233
           1       0.96      1.00      0.98       179

    accuracy                           0.98       412
   macro avg       0.98      0.98      0.98       412
weighted avg       0.98      0.98      0.98       412

******************************************************
Classifier: RandomForestClassifier (Default)
Accuracy: 0.9830097087378641
Confusion matrix:
[[228   5]
 [  2 177]]
Clasification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.98       233
           1       0.97      0.99      0.98       179

    accuracy                           0.98       412
   macro avg       0.98      0.98      0.98       412
weighted avg       0.98    

In [16]:
analysis_df = pd.DataFrame(result_list)
analysis_df

Unnamed: 0,classifier,accuracy,confusion_matrix,classification_report,regressor_instance
0,DecisionTreeClassifier,0.98301,[[226 7]\n [ 0 179]],precision recall f1-score ...,DecisionTreeClassifier()
1,RandomForestClassifier (Default),0.98301,[[228 5]\n [ 2 177]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=9, max_featu..."
2,RandomForestClassifier (Deep=1),0.854369,[[195 38]\n [ 22 157]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=1, max_featu..."
3,RandomForestClassifier (Deep=2),0.910194,[[212 21]\n [ 16 163]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=2, max_featu..."
4,RandomForestClassifier (Deep=3),0.936893,[[213 20]\n [ 6 173]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=3, max_featu..."
5,RandomForestClassifier (Deep=4),0.946602,[[215 18]\n [ 4 175]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=4, max_featu..."
6,RandomForestClassifier (Deep=5),0.966019,[[220 13]\n [ 1 178]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=5, max_featu..."
7,RandomForestClassifier (Deep=6),0.966019,[[222 11]\n [ 3 176]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=6, max_featu..."
8,RandomForestClassifier (Deep=7),0.975728,[[223 10]\n [ 0 179]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=7, max_featu..."
9,RandomForestClassifier (Deep=8),0.987864,[[228 5]\n [ 0 179]],precision recall f1-score ...,"(DecisionTreeClassifier(max_depth=8, max_featu..."


# Regression

In [7]:
petrol_df = pd.read_csv('petrol_consumption.csv', names = ['pt', 'ai', 'ph', 'pdl', 'pc'], skiprows = 1)
petrol_df.sample(5)

Unnamed: 0,pt,ai,ph,pdl,pc
39,7.0,4345,3905,0.672,968
36,5.0,4045,17782,0.566,640
23,9.0,4258,4686,0.517,547
42,7.0,4300,3635,0.603,632
6,8.0,5319,11868,0.451,344


In [8]:
#Split the data
target_names = ['pt', 'ai', 'ph', 'pdl']
x_var = np.asarray(petrol_df[target_names])
y_var = np.asarray(petrol_df['pc'])

In [9]:
def train_regressor(result_list, alg_type, algorithm, x_var, y_var, binary_func = None):
    x_train, x_test, y_train, y_test = train_test_split(x_var, y_var, test_size = 0.3, random_state = 4)
    # Uses StandardScaler to normalize the data
    x_train = StandardScaler().fit_transform(x_train)
    x_test = StandardScaler().fit_transform(x_test)
    # Sets up the regression using the training data
    algorithm.fit(x_train, y_train)
    # Predicts the values
    y_pred = algorithm.predict(x_test)
    # If a lambda function is provided then call it to obtain the y values
    if binary_func:
        y_pred = binary_func(y_pred)
    # Appends the results to the list
    result = {
        'regressor': alg_type,
        'regressor_instance': algorithm,
        'MAE': mean_absolute_error(y_test, y_pred),
        'MSE': mean_squared_error(y_test, y_pred),
        'RMSE': mean_squared_error(y_test, y_pred,squared=False)
    }
    print(f'******************************************************')
    print(f'Regressor: {result["regressor"]}')
    print(f'MAE: {result["MAE"]}')
    print(f'MSE: {result["MSE"]}')
    print(f'RMSE: {result["RMSE"]}')
    result_list.append(result)
    

In [10]:
result_list =  []

dtr1 = DecisionTreeRegressor()
train_regressor(result_list, 'DecisionTreeRegressor', dtr1, x_var, y_var)

rfr1 = RandomForestRegressor()
train_regressor(result_list, 'RandomForestRegressor', rfr1, x_var, y_var)

lr1 = LinearRegression()
train_regressor(result_list, 'LinearRegressor', lr1, x_var, y_var)

******************************************************
Regressor: DecisionTreeRegressor
MAE: 96.4
MSE: 12772.4
RMSE: 113.01504324646343
******************************************************
Regressor: RandomForestRegressor
MAE: 61.379333333333335
MSE: 5670.77978
RMSE: 75.30458007319342
******************************************************
Regressor: LinearRegressor
MAE: 49.769341539979855
MSE: 4452.217922749505
RMSE: 66.72494228359815


In [11]:
analysis_df = pd.DataFrame(result_list)
analysis_df

Unnamed: 0,regressor,regressor_instance,MAE,MSE,RMSE
0,DecisionTreeRegressor,DecisionTreeRegressor(),96.4,12772.4,113.015043
1,RandomForestRegressor,"(DecisionTreeRegressor(max_features=1.0, rando...",61.379333,5670.77978,75.30458
2,LinearRegressor,LinearRegression(),49.769342,4452.217923,66.724942
