In [128]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Linear Regression

## Simple equation

We generate some easy data for testing. We should fit a line with, 
$Y = 30 * X + 20$

In [129]:
X = np.array(np.mat(np.arange(1, 1000, 5)).T)
y = np.array((30 * X)).flatten() +  20

### Custom Model

In [130]:
from modelzoo.models import LinearRegression as CustomLR

In [132]:
reg_cust = CustomLR(alpha=1, num_iter=10000000).fit(X, y)
reg_cust.score(X, y)

Iterations: 100%|██████████| 10000000/10000000 [12:56:54<00:00, 214.53it/s]      


-12.532239459379802

In [134]:
reg_cust.coef

array([19.75238883, 30.00037101])

### sklearn

In [11]:
from sklearn.linear_model import LinearRegression

In [13]:
reg_sk = LinearRegression().fit(X, y)
reg_sk.score(X, y)

1.0

In [15]:
reg_sk.coef_, reg_sk.intercept_

(array([30.]), 19.999999999996362)

## Wine Dataset

In [21]:
url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
wine = pd.read_csv(url_Wine, delimiter=';')
X = wine[['density','alcohol']]
y = wine.quality

### Custom Model

In [22]:
lr_custom = CustomLR(alpha = 1, num_iter = 500000)
lr_custom.fit(X,y)
sum((lr_custom.predict(X) - y)**2)

Iterations: 100%|██████████| 500000/500000 [53:22<00:00, 156.12it/s]  


805.6487122633293

In [23]:
lr_custom.coef

array([1.12371019, 0.75125029, 0.36108436])

### sklearn

In [27]:
lr_sklearn = LinearRegression()
lr_sklearn.fit(X,y)
sum((lr_sklearn.predict(X) - y)**2)

800.667698877433

In [30]:
lr_sklearn.coef_, lr_sklearn.intercept_

(array([34.82170159,  0.39144139]), -33.152379861687145)

# Naive Bayes

## Balance Scale Data

In [84]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data'
col = ['class_name','left_weight','left_distance','right_weight','right_distance']
data = pd.read_csv(url, delimiter = ',', names = col)

In [109]:
X = np.matrix(data.iloc[:,1:])
y = data.class_name
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.10, random_state = 88)

### Custom Model

In [114]:
from modelzoo.models import NaiveBayes

In [123]:
nb_custom = NaiveBayes()
nb_custom.fit(X_train, y_train)
nb_custom.score(X_test, y_test)

0.9206349206349206

### sklearn

In [124]:
from sklearn.naive_bayes import GaussianNB

In [125]:
nb_sklearn = GaussianNB()
nb_sklearn.fit(X_train, y_train)
nb_sklearn.score(X_test, y_test)

0.9365079365079365

# Decision Tree

## Wine Data

In [171]:
url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
wine = pd.read_csv(url_Wine, delimiter=';')

In [172]:
X = np.array(wine)[:, :-1]
y = np.array(wine)[:, -1]
y = np.array(y.flatten()).astype(int)

In [173]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 0)

### Custom Model

In [174]:
from modelzoo.models import DecisionTree

In [175]:
tree_custom = DecisionTree()
tree_custom.fit(X_train, y_train)

DecisionTree

In [176]:
tree_custom.score(X_train, y_train)

1.0

In [177]:
tree_custom.score(X_test, y_test)

0.628125

### sklearn

In [178]:
from sklearn.tree import DecisionTreeClassifier

In [179]:
tree_sk = DecisionTreeClassifier()
tree_sk.fit(X_train, y_train)

DecisionTreeClassifier()

In [180]:
tree_sk.score(X_train, y_train)

1.0

In [181]:
tree_sk.score(X_test, y_test)

0.68125

# Random Forest

- using same wine dataset from decision tree classifier

## Custom Model

In [182]:
from modelzoo.models import RandomForest

In [183]:
forest_custom = RandomForest(n_trees= 100, n_split=None)
forest_custom.fit(X_train, y_train)
forest_custom.score(X_test, y_test)

Fitting Forest: 100%|██████████| 100/100 [04:14<00:00,  2.54s/it]


0.721875

## sklearn

In [186]:
from sklearn.ensemble import RandomForestClassifier

In [187]:
forest_sk = RandomForestClassifier()
forest_sk.fit(X_train, y_train)
forest_sk.score(X_test, y_test)

0.703125