<h5 style="color:green"> All Models' features need to be 2D Arrays even if there is one feature </h3>
<ul>
<li> No documnetation
<li> Better OOP Design and Redundancy Omitting
<li> To be implemented :
    <ul>
        <li> Gaussian Naive Bayes
        <li> Logistic Regression
        <li> Support Vector Machine
        <li> Gradient Boosting
        <li> DBSCAN and HDBSCAN
        <li> K-Means
        <li> PCA - UMAP
        <li> Reinforcement Learning
        <li> AlphaZero
        <li> Factorization Methods
        <li> Convolutional Neural Networks
        <li> RNN + LSTM
        <li> Transformers
    </ul>
<li> Needs Better Implementations :
    <ul>
        <li> Faster BallTree / KDTree Algorithms for KNN
    </ul>
</ul>

In [None]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier

import decision_tree
import random_forest
import linear_regression
import naive_bayes
import nearest_neighbors

from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pickle

import time
import matplotlib.pyplot as plt

In [None]:
def load_dataset(filename):
    with open(filename, "rb") as f:
        return pickle.load(f)

In [None]:
def compareModels(models, X, y , metric):
    for i, model in enumerate(models):
        print(f"Model {i}")
        start = time.perf_counter()
        model.fit(X[0], y[0])
        end = time.perf_counter()
        print(f"Time to fit : {end - start} s")
        print(f"Performance : {metric(model.predict(X[1]), y[1])}")


In [None]:
df = load_dataset("citiesSmall.pkl")
X = df['X']
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)
X_train.shape

In [None]:
depth = [i for i in range(1, 24)]
accuracy = [[], []]
execution_time = [[], []]

for i in depth:
    print("==========================================")
    models = []
    models.append(DecisionTreeClassifier(criterion='gini', max_depth=i))
    models.append(decision_tree.DecisionTreeClassifier(criterion='gini', max_depth=i))

    compareModels(models, (X_train, X_test), (y_train, y_test), accuracy_score)
    

In [None]:
X, y = datasets.make_classification(n_samples=1000, n_features=5, n_informative=3, n_redundant=2, n_classes=3, random_state=19)

X_categorical = pd.DataFrame()
for i in range(X.shape[1]):
    categorical_feature = pd.cut(X[:, i], bins=5, labels=[j for j in range(5)])
    X_categorical[f'feature_{i}'] = categorical_feature

X = X_categorical.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)

depth = [i for i in range(1, 24)]
accuracy = [[], []]
execution_time = [[], []]

for i in depth:
    print(f"Depth {i} ==========================================")
    models = []
    models.append(RandomForestClassifier(criterion='gini', max_depth=i))
    models.append(random_forest.RandomForestClassifier(criterion='gini', max_depth=i))

    compareModels(models, (X_train, X_test), (y_train, y_test), accuracy_score)

In [None]:
X, y = datasets.make_regression(n_samples=10000, n_features=10, noise=1, bias=19, random_state=4)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19)

models = [LinearRegression(), linear_regression.LinearRegression()]
compareModels(models, (X_train, X_test), (y_train, y_test), mean_squared_error)

# fig = plt.figure(figsize=(10, 8))
# plt.scatter(X, y, marker='o')
# plt.plot(X, model.predict(X), marker = 'o')
# plt.show()

In [None]:
X, y = datasets.make_classification(n_samples=18000, n_features=5, n_informative=5, n_redundant=0, n_classes=2, random_state=19)

X_categorical = pd.DataFrame()
for i in range(X.shape[1]):
    categorical_feature = pd.cut(X[:, i], bins=5, labels=[j for j in range(5)])
    X_categorical[f'feature_{i}'] = categorical_feature

X = X_categorical.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)

models = [MultinomialNB(), naive_bayes.NaiveBayes()]
compareModels(models, (X_train, X_test), (y_train, y_test), accuracy_score)

In [None]:
X, y = datasets.make_classification(n_samples=1000, n_features=5, n_informative=3, n_redundant=2, n_classes=3, random_state=19)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)

for k in range(5, 101):
    print(f"\n{k} neighbors ============================\n")
    models = [KNeighborsClassifier(n_neighbors=k), nearest_neighbors.KNN(k)]
    compareModels(models, (X_train, X_test), (y_train, y_test), accuracy_score)

In [None]:
from deep_learning import *
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X.shape)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

mlp = Module()

# Add layers to your MLP model
mlp.add_layer(Layer(input_dim=X_train.shape[1], output_dim=64))  # Adjust the dimensions as needed
mlp.add_layer(Sigmoid(64, 64))  # Add activation layers and adjust dimensions as needed
mlp.add_layer(Layer(64, 1))

# Fit the model to the training data
mlp.fit(X_train, y_train)

# Test the model on the testing data
predictions = mlp.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")