In [1]:
import numpy as np
import pandas as pd

# from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier

import decision_tree
import random_forest
import linear_regression
from naive_bayes import NaiveBayes
from nearest_neighbors import KNN

from sklearn_base import Pipeline
from sklearn_base import DummyTransformer
from sklearn_base import ModelSelector
from sklearn_base import StackingClassifier, StackingRegressor
from logistic_regression import LogisticRegression
from linear_regression import LinearRegression
from decision_tree import DecisionTreeClassifier, DecisionTreeRegressor
from random_forest import RandomForestClassifier

from metrics import accuracy
from metrics import accuracy_wrapper

from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pickle

import time
import matplotlib.pyplot as plt

In [2]:
def load_dataset(filename):
    with open(filename, "rb") as f:
        return pickle.load(f)

In [3]:
df = load_dataset("citiesSmall.pkl")
X = df['X']
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)
X_train.shape

(300, 2)

In [4]:
pipeline = Pipeline([
    ('transform', DummyTransformer()),
    ('classify',  RandomForestClassifier())
])

# Fit and predict using the pipeline
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)
predictions


[0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0]

In [11]:
model_selector = ModelSelector([
    # ('clf1', DecisionTreeClassifier()),
    # ('clf3', RandomForestClassifier()),
    ('clf3', KNeighborsClassifier(n_neighbors=7))
],
scoring=accuracy_wrapper)

model_selector.fit(X_train, y_train)
best_predictions = model_selector.predict(X_test)
print("Best Score:", model_selector.best_score)
print("Best Params:", model_selector.best_params)
print("Best Estimator:", model_selector.best_estimator_)
print("Model Scores:", model_selector.get_scores())

Best Score: 0.8933333333333333
Best Params: {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 7, 'p': 2, 'weights': 'uniform'}
Best Estimator: KNeighborsClassifier(n_neighbors=7)
Model Scores: [('clf3', 0.8933333333333333)]


In [13]:
base_classifiers = [
    RandomForestClassifier(),
    DecisionTreeClassifier()
]
meta_classifier = DecisionTreeClassifier()

# Create Stacking Classifier
stacking_clf = StackingClassifier(base_classifiers, meta_classifier, scoring=accuracy)
stacking_clf.fit(X_train, y_train)
y_pred = stacking_clf.predict(X_test)

print("Predictions:", y_pred)
print("Score:", stacking_clf.score(X_test, y_test))

In [None]:
# Define a simple dataset for regression
np.random.seed(42) 
X_train_reg = np.random.rand(100, 10)  # 100 samples, 10 features
y_train_reg = np.dot(X_train_reg, np.random.rand(10)) + np.random.rand(100)  # Linear relationship with noise
X_test_reg = np.random.rand(20, 10)  # 20 test samples
y_test_reg = np.dot(X_test_reg, np.random.rand(10)) + np.random.rand(20)

# Define base regressors
base_regressors = [
    LinearRegression()
]
meta_regressor = LinearRegression()

stacking_reg = StackingRegressor(base_regressors, meta_regressor, scoring=mean_squared_error)
stacking_reg.fit(X_train_reg, y_train_reg)
y_pred_reg = stacking_reg.predict(X_test_reg)

print("Predictions:", y_pred_reg)
print("Mean Squared Error:", round(mean_squared_error(y_test_reg, y_pred_reg), 4))
print("Score:", round(stacking_reg.score(X_test_reg, y_test_reg), 4))

In [12]:
from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler
from sklearn.preprocessing import StandardScaler as SklearnStandardScaler
from scaler import MinMaxScaler, StandardScaler

# Sample data
np.random.seed(42)
X = np.random.rand(100, 5)

# Test MinMaxScaler
custom_minmax_scaler = MinMaxScaler(feature_range=(0, 1))
custom_minmax_scaler.fit(X)
X_custom_minmax_scaled = custom_minmax_scaler.transform(X)
X_custom_minmax_inverse = custom_minmax_scaler.inverse_transform(X_custom_minmax_scaled)

sklearn_minmax_scaler = SklearnMinMaxScaler(feature_range=(0, 1))
X_sklearn_minmax_scaled = sklearn_minmax_scaler.fit_transform(X)
X_sklearn_minmax_inverse = sklearn_minmax_scaler.inverse_transform(X_sklearn_minmax_scaled)

print("Custom MinMaxScaler:\n", X_custom_minmax_scaled[:5])
print("Sklearn MinMaxScaler:\n", X_sklearn_minmax_scaled[:5])
print("Custom MinMaxScaler Inverse Transform:\n", X_custom_minmax_inverse[:5])
print("Sklearn MinMaxScaler Inverse Transform:\n", X_sklearn_minmax_inverse[:5])

# Verify the results
assert np.allclose(X_custom_minmax_scaled, X_sklearn_minmax_scaled, atol=1e-6), "MinMaxScaler results do not match!"
assert np.allclose(X, X_custom_minmax_inverse, atol=1e-6), "MinMaxScaler inverse_transform results do not match the original data!"

print("========================")

# Test StandardScaler
custom_standard_scaler = StandardScaler()
custom_standard_scaler.fit(X)
X_custom_standard_scaled = custom_standard_scaler.transform(X)
X_custom_standard_inverse = custom_standard_scaler.inverse_transform(X_custom_standard_scaled)

sklearn_standard_scaler = SklearnStandardScaler()
X_sklearn_standard_scaled = sklearn_standard_scaler.fit_transform(X)
X_sklearn_standard_inverse = sklearn_standard_scaler.inverse_transform(X_sklearn_standard_scaled)

print("Custom StandardScaler:\n", X_custom_standard_scaled[:5])
print("Sklearn StandardScaler:\n", X_sklearn_standard_scaled[:5])
print("Custom StandardScaler Inverse Transform:\n", X_custom_standard_inverse[:5])
print("Sklearn StandardScaler Inverse Transform:\n", X_sklearn_standard_inverse[:5])

# Verify the results
assert np.allclose(X_custom_standard_scaled, X_sklearn_standard_scaled, atol=1e-6), "StandardScaler results do not match!"
assert np.allclose(X, X_custom_standard_inverse, atol=1e-6), "StandardScaler inverse_transform results do not match the original data!"

print("All tests passed successfully!")

Custom MinMaxScaler:
 [[0.37137126 0.9598043  0.75584966 0.61237651 0.14469594]
 [0.14921964 0.04774697 0.89545806 0.61491076 0.71298574]
 [0.01157534 0.9794176  0.86036041 0.21383516 0.17126123]
 [0.1770819  0.29926284 0.54023166 0.44038874 0.28388297]
 [0.61259972 0.13092898 0.29821328 0.37273071 0.45357177]]
Sklearn MinMaxScaler:
 [[0.37137126 0.9598043  0.75584966 0.61237651 0.14469594]
 [0.14921964 0.04774697 0.89545806 0.61491076 0.71298574]
 [0.01157534 0.9794176  0.86036041 0.21383516 0.17126123]
 [0.1770819  0.29926284 0.54023166 0.44038874 0.28388297]
 [0.61259972 0.13092898 0.29821328 0.37273071 0.45357177]]
Custom MinMaxScaler Inverse Transform:
 [[0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
 [0.15599452 0.05808361 0.86617615 0.60111501 0.70807258]
 [0.02058449 0.96990985 0.83244264 0.21233911 0.18182497]
 [0.18340451 0.30424224 0.52475643 0.43194502 0.29122914]
 [0.61185289 0.13949386 0.29214465 0.36636184 0.45606998]]
Sklearn MinMaxScaler Inverse Transform:
 [