In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
# Generate random linear data with noise
import matplotlib.pyplot as plt
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Split into training and test sets
X_train = X[:80]
X_test = X[80:]
y_train = y[:80] 
y_test = y[80:]

#plotting the data
plt.scatter(X_train, y_train, color='blue', label='Training data')
plt.scatter(X_test, y_test, color='red', label='Test data')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Training and Test Data')
plt.legend()
plt.show()

In [None]:
# Generate synthetic binary classification dataset
from sklearn.datasets import make_classification

# Generate synthetic binary classification dataset
X, y = make_classification(n_samples=1000, 
                         n_features=20,
                         n_classes=2,
                         n_clusters_per_class=2,
                         n_redundant=2,
                         n_informative=10,
                         random_state=42)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

In [None]:
from sklearn.model_selection import train_test_split
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#import minst
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from typing import Literal
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV

class MyFavoriteLinearregressor:
    def __init__(self, model_name: Literal["Linear","Ridge","Lasso","ElasticNet","SGD"]):
        self.model_name: Literal["Linear","Ridge","Lasso","ElasticNet","SGD"]= model_name
        self.param_grid = None
        if model_name == 'Linear':
            self.model = LinearRegression()
        elif model_name == 'Ridge':
            self.model = Ridge()
        elif model_name == 'Lasso':
            self.model = Lasso()
        elif model_name == 'ElasticNet':
            self.model = ElasticNet()
        elif model_name == 'SGD':
            self.model = SGDRegressor()
        else:
            raise ValueError('Invalid model name')
        
    
    def hyperparameter_tuning(self, X, y):
        if self.model_name == 'Linear':
            self.param_grid = {'fit_intercept': [True, False]}
        elif self.model_name == 'Ridge':
            self.param_grid = {'alpha': [0.1, 1.0, 10.0]}
        elif self.model_name == 'Lasso':
            self.param_grid = {'alpha': [0.1, 1.0, 10.0]}
        elif self.model_name == 'ElasticNet':
            self.param_grid = {'alpha': [0.1, 1.0, 10.0], 'l1_ratio': [0.1, 0.5, 0.9]}
        elif self.model_name == 'SGD':
            self.param_grid = {'alpha': [0.0001, 0.001, 0.01], 'learning_rate': ['constant', 'optimal', 'invscaling']}
        else:
            raise ValueError('Invalid model name')
        # Create grid search object
        grid_search = GridSearchCV(
            estimator=self.model,
            param_grid=self.param_grid,
            cv=5,
            scoring='neg_mean_squared_error',
            n_jobs=-1
        )
        
        # Fit grid search
        grid_search.fit(X, y)
    
        # Print best parameters and score
        print(f'Best parameters: {grid_search.best_params_}')
        print(f'Best MSE: {-grid_search.best_score_}')
        
        # Update model with best estimator
        self.model = grid_search.best_estimator_
        
        return self    
    def fit(self, X, y):
        self.hyperparameter_tuning(X, y)
        # Fit the model
        self.model.fit(X, y)
        y_pred = self.model.predict(X)
        mse = mean_squared_error(y, y_pred)
        print(f'MSE: {mse}')
        return self
    
    def predict(self, X):
        return self.model.predict(X)
    
    def plot_learning_curve(self, X, y):
        from sklearn.model_selection import learning_curve
        import matplotlib.pyplot as plt
        import numpy as np
        train_sizes, train_scores, test_scores = learning_curve(
            self.model, X, y, cv=5, n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)
        plt.figure(figsize=(10, 6))
        plt.title(f"Learning Curve for {self.model_name}")
        plt.xlabel("Training examples")
        plt.ylabel("Score")
        plt.grid()
        plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.1, color="r")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.1, color="g")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")
        plt.legend(loc="best")
        plt.show()
    

In [None]:
class MyFavoriteBinaryClassifier:
    def __init__(self, model_name: Literal["Logistic","SVC","RandomForest","GradientBoosting","XGBoost"]):
        self.model_name: Literal["Logistic","SVC","RandomForest","GradientBoosting","XGBoost"]= model_name
        self.param_grid = None
        if model_name == 'Logistic':
            from sklearn.linear_model import LogisticRegression
            self.model = LogisticRegression()
        elif model_name == 'SVC':
            from sklearn.svm import SVC
            self.model = SVC()
        elif model_name == 'RandomForest':
            from sklearn.ensemble import RandomForestClassifier
            self.model = RandomForestClassifier()
        elif model_name == 'GradientBoosting':
            from sklearn.ensemble import GradientBoostingClassifier
            self.model = GradientBoostingClassifier()
        elif model_name == 'XGBoost':
            from xgboost import XGBClassifier

            self.model = XGBClassifier()
        else:
            raise ValueError('Invalid model name')
    
    def hyperparameter_tuning(self, X, y):
        if self.model_name == 'Logistic':
            self.param_grid = {
                'C': [0.1, 1, 10],
                'penalty': ['l1', 'l2'],
                'solver': ['liblinear', 'saga']
            }
        elif self.model_name == 'SVC':
            self.param_grid = {
                'C': [0.1, 1, 10],
                'kernel': ['linear', 'rbf'],
                'gamma': ['scale', 'auto']
            }
        elif self.model_name == 'RandomForest':
            self.param_grid = {
                'n_estimators': [100, 200, 300],
                'max_depth': [None, 5, 10],
                'min_samples_split': [2, 5, 10]
            }
        elif self.model_name == 'GradientBoosting':
            self.param_grid = {
                'n_estimators': [100, 200, 300],
                'learning_rate': [0.1, 0.01, 0.001],
                'max_depth': [3, 5, 7]
            }
        elif self.model_name == 'XGBoost':
            self.param_grid = {
                'n_estimators': [100, 200, 300],
                'learning_rate': [0.1, 0.01, 0.001],
                'max_depth': [3, 5, 7]
            }
        else:
            raise ValueError('Invalid model name')
        
        # Create grid search object
        grid_search = GridSearchCV(
            estimator=self.model,
            param_grid=self.param_grid,
            cv=5,
            scoring='accuracy',
            n_jobs=-1
        )
        
        # Fit grid search
        grid_search.fit(X, y)
       
       #print best parameters and score
        print(f'Best parameters: {grid_search.best_params_}')
        print(f'Best accuracy: {grid_search.best_score_}')
        
        # Update model with best estimator
        self.model = grid_search.best_estimator_
        
        return self


    def fit(self, X, y):
        from sklearn.metrics import accuracy_score
        self.hyperparameter_tuning(X, y)
        # Fit the model
        self.model.fit(X, y)
        y_pred = self.model.predict(X)
        accuracy = accuracy_score(y, y_pred)
        print(f"Accuracy: {accuracy}")

    def predict(self, X):
        return self.model.predict(X)
    
    def plot_precision_recall_curve(self, X, y):
        from sklearn.metrics import precision_recall_curve
        from sklearn.metrics import average_precision_score
        from sklearn.metrics import PrecisionRecallDisplay
        import matplotlib.pyplot as plt
        y_scores = self.model.predict_proba(X)[:, 1]
        average_precision = average_precision_score(y, y_scores)
        disp = PrecisionRecallDisplay.from_predictions(y, y_scores, name=self.model_name)
        _ = disp.ax_.set_title(f"2-class Precision-Recall curve: "
                               f"AP={average_precision:0.2f}")
        plt.show()

In [None]:
for reg in ['Linear','Ridge','Lasso','ElasticNet','SGD']:
    print(f"Testing {reg} regression")
    try:
        flr = MyFavoriteLinearregressor(reg)
    except ValueError as v:
        print(v)
    except Exception as e:
        print("something else went wrong",e,sep="\n\n")
    else:
        flr.fit(X_train, y_train)
        flr.plot_learning_curve(X_train, y_train)

In [None]:
for bc in ['Logistic','SVC','RandomForest','GradientBoosting','XGBoost']:
    print(f"Testing {bc} classifier")
    try:
        fbc = MyFavoriteBinaryClassifier(bc)
    except ValueError as v:
        print(v)
    except Exception as e:
        print("something else went wrong",e,sep="\n\n")
    else:
        fbc.fit(X_train, y_train)
        try:
            fbc.plot_precision_recall_curve(X_test, y_test)
        except Exception as e:
            continue