# ***Ultimate Dictionary ***
PreMade Algorithm Models

#Linear Regression


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_train_pred = regressor.predict(X_train)

mse = mean_squared_error(y_train, y_train_pred)
r2 = r2_score(y_train, y_train_pred)

print("Training MSE:", mse)
print("Training R²:", r2)



```
LinearRegression(
    fit_intercept=True,
    normalize=False,  # Deprecated in future versions
    copy_X=True,
    n_jobs=-1  # Use all processors
)
```



#Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

y_train_pred = logistic_model.predict(X_train)

print("Coefficients:", logistic_model.coef_)
print("Intercept:", logistic_model.intercept_)



```
LogisticRegression(
    penalty='l2',          # L2 regularization
    C=1.0,                 # Regularization strength
    fit_intercept=True,    # Include intercept
    solver='lbfgs',        # Optimization algorithm
    max_iter=200,          # Increase iterations
    random_state=42,       # For reproducibility
    class_weight='balanced'  # Handle class imbalance
)
```



# Polynomial Regression


Polynomial regression is not a separate regression model but involves transforming the input features into polynomial features and then applying linear regression.

Scikit-learn provides PolynomialFeatures from sklearn.preprocessing to generate polynomial features. Here's how to implement it with and without parameters:

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

model = Pipeline([
    ("polynomial_features", PolynomialFeatures()),
    ("linear_regression", LinearRegression())
])
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)

print("Coefficients:", model.named_steps["linear_regression"].coef_)
print("Intercept:", model.named_steps["linear_regression"].intercept_)

# StepWise Regression

Stepwise regression is not directly implemented in scikit-learn, but it can be performed programmatically by iteratively adding or removing features based on some criteria (e.g., p-values, adjusted R², AIC, or BIC). Below are examples of how to perform stepwise regression, both without and with custom parameters.

In [None]:
import statsmodels.api as sm
import numpy as np

remaining_features = list(range(X_train.shape[1]))
selected_features = []
best_adj_r2 = -np.inf

while remaining_features:
    adj_r2_values = []
    for feature in remaining_features:
        # Try adding each remaining feature to the selected features
        current_features = selected_features + [feature]
        X_subset = X_train[:, current_features]
        model = sm.OLS(y_train, sm.add_constant(X_subset)).fit()
        adj_r2_values.append((model.rsquared_adj, feature))

    # Select the feature that improves adjusted R² the most
    adj_r2_values.sort(reverse=True, key=lambda x: x[0])
    best_candidate, best_feature = adj_r2_values[0]

    if best_candidate > best_adj_r2:
        best_adj_r2 = best_candidate
        selected_features.append(best_feature)
        remaining_features.remove(best_feature)
    else:
        break  # Stop if no improvement


X_selected = X_train[:, selected_features]
final_model = sm.OLS(y_train, sm.add_constant(X_selected)).fit()

print("Selected features:", selected_features)
print(final_model.summary())


In [None]:
import statsmodels.api as sm

def stepwise_regression(X, y, entry_pvalue=0.05, stay_pvalue=0.05):
    initial_features = list(range(X.shape[1]))
    selected_features = []
    while True:
        # Forward step: Add the best feature
        remaining_features = list(set(initial_features) - set(selected_features))
        new_pvalues = {}
        for feature in remaining_features:
            model = sm.OLS(y, sm.add_constant(X[:, selected_features + [feature]])).fit()
            new_pvalues[feature] = model.pvalues[-1]  # Get p-value for the new feature

        if new_pvalues and min(new_pvalues.values()) < entry_pvalue:
            best_feature = min(new_pvalues, key=new_pvalues.get)
            selected_features.append(best_feature)

        # Backward step: Remove the worst feature
        model = sm.OLS(y, sm.add_constant(X[:, selected_features])).fit()
        pvalues = model.pvalues[1:]  # Exclude intercept
        max_pvalue = max(pvalues, default=0)
        if max_pvalue > stay_pvalue:
            worst_feature = selected_features[pvalues.argmax()]
            selected_features.remove(worst_feature)

        # Stopping condition
        if not new_pvalues or min(new_pvalues.values()) >= entry_pvalue and max_pvalue <= stay_pvalue:
            break

    final_model = sm.OLS(y, sm.add_constant(X[:, selected_features])).fit()
    return final_model, selected_features


final_model, selected_features = stepwise_regression(X_train, y_train, entry_pvalue=0.05, stay_pvalue=0.05)
print("Selected features:", selected_features)
print(final_model.summary())


#Ridge Regression

In [None]:
from sklearn.linear_model import Ridge

ridge_model = Ridge()
ridge_model.fit(X_train, y_train)

y_train_pred = ridge_model.predict(X_train)

print("Coefficients:", ridge_model.coef_)
print("Intercept:", ridge_model.intercept_)



```
# ridge_model = Ridge(
    alpha=0.5,           # Regularization strength (smaller value = less regularization)
    fit_intercept=True,  # Include intercept in the model
    solver='saga',       # Use 'saga' solver for large datasets or sparse data
    max_iter=1000,       # Maximum number of iterations for optimization
    random_state=42      # Seed for reproducibility
)
```



# Lasso Regression

Lasso regression is a linear regression model with L1 regularization, which performs feature selection by shrinking some coefficients to exactly zero. Below are examples of how to train a Lasso regression model with and without parameters using scikit-learn.

In [None]:
from sklearn.linear_model import Lasso

lasso_model = Lasso()
lasso_model.fit(X_train, y_train)

y_train_pred = lasso_model.predict(X_train)

print("Coefficients:", lasso_model.coef_)
print("Intercept:", lasso_model.intercept_)



```
lasso_model = Lasso(
    alpha=0.01,          # Regularization strength (smaller value = less regularization)
    fit_intercept=True,  # Include intercept in the model
    max_iter=5000,       # Maximum number of iterations for optimization
    tol=1e-6,            # Tolerance for stopping criteria
    selection='random',  # Random feature selection for faster convergence
    random_state=42      # Seed for reproducibility
)
```



#Elastic Net Regression

In [None]:
from sklearn.linear_model import ElasticNet

elasticnet_model = ElasticNet()
elasticnet_model.fit(X_train, y_train)

y_train_pred = elasticnet_model.predict(X_train)

print("Coefficients:", elasticnet_model.coef_)
print("Intercept:", elasticnet_model.intercept_)



```
elasticnet_model = ElasticNet(
    alpha=0.01,          # Regularization strength (smaller value = less regularization)
    l1_ratio=0.7,        # Proportion of L1 penalty (closer to 1 = more L1 effect)
    fit_intercept=True,  # Include intercept in the model
    max_iter=5000,       # Maximum number of iterations
    tol=1e-6,            # Tolerance for stopping criteria
    random_state=42,     # Seed for reproducibility
    selection='random'   # Random feature updates for faster convergence
)
```



#K Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_train)

accuracy = accuracy_score(y_train, y_pred)
print(f"Training Accuracy: {accuracy * 100:.2f}%")



```
# Custom parameters (adjust these as per your needs)
n_neighbors = 5  # Number of neighbors to use for prediction
metric = 'euclidean'  # Distance metric (e.g., 'euclidean', 'manhattan', etc.)
weights = 'uniform'  # Weights to assign to the neighbors (e.g., 'uniform', 'distance')

# Initialize the KNN classifier with custom parameters
knn = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric, weights=weights)
```



# Decision Tree Classification

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

dt_default = DecisionTreeClassifier()
dt_default.fit(X_train, y_train)

y_pred_default = dt_default.predict(X_train)

accuracy_default = accuracy_score(y_train, y_pred_default)
print(f"Decision Tree Training Accuracy: {accuracy_default * 100:.2f}%")



```
# Custom parameters (adjust as needed)
max_depth = 5  # Maximum depth of the tree
min_samples_split = 10  # Minimum samples required to split an internal node
criterion = 'gini'  # Criterion to measure the quality of a split ('gini' or 'entropy')
splitter = 'best'  # Splitting strategy ('best' or 'random')

# Initialize the Decision Tree classifier with custom parameters
dt_custom = DecisionTreeClassifier(max_depth=max_depth,
                                   min_samples_split=min_samples_split,
                                   criterion=criterion,
                                   splitter=splitter)
```



# Support Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm_default = SVC()
svm_default.fit(X_train, y_train)

y_pred_default = svm_default.predict(X_train)

accuracy_default = accuracy_score(y_train, y_pred_default)
print(f"Default SVM Training Accuracy: {accuracy_default * 100:.2f}%")



```
# Custom parameters (adjust as needed)
kernel = 'rbf'  # Kernel type (options: 'linear', 'poly', 'rbf', 'sigmoid')
C = 1.0  # Regularization parameter, larger values prevent overfitting
gamma = 'scale'  # Kernel coefficient for 'rbf', 'poly', 'sigmoid' ('scale' or 'auto')
degree = 3  # Degree of the polynomial kernel function (if 'poly' kernel is used)

# Initialize the SVM classifier with custom parameters
svm_custom = SVC(kernel=kernel, C=C, gamma=gamma, degree=degree)
```



#Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

nb_default = GaussianNB()#var_smoothing = 1e-9 is the parameter you can change
nb_default.fit(X_train, y_train)

y_pred_default = nb_default.predict(X_train)

accuracy_default = accuracy_score(y_train, y_pred_default)
print(f"Default Naive Bayes Training Accuracy: {accuracy_default * 100:.2f}%")

#Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_default = RandomForestClassifier()
rf_default.fit(X_train, y_train)

y_pred_default = rf_default.predict(X_train)

accuracy_default = accuracy_score(y_train, y_pred_default)
print(f"Default Random Forest Training Accuracy: {accuracy_default * 100:.2f}%")



```
n_estimators = 100  # Number of trees in the forest
max_depth = None  # Maximum depth of the trees
min_samples_split = 2  # Minimum number of samples required to split an internal node
min_samples_leaf = 1  # Minimum number of samples required to be at a leaf node
criterion = 'gini'  # The function to measure the quality of a split ('gini' or 'entropy')

# Initialize the Random Forest classifier with custom parameters
rf_custom = RandomForestClassifier(n_estimators=n_estimators,
                                   max_depth=max_depth,
                                   min_samples_split=min_samples_split,
                                   min_samples_leaf=min_samples_leaf,
                                   criterion=criterion)
```



# Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

gb_default = GradientBoostingClassifier()
gb_default.fit(X_train, y_train)

y_pred_default = gb_default.predict(X_train)

accuracy_default = accuracy_score(y_train, y_pred_default)
print(f"Default Gradient Boosting Training Accuracy: {accuracy_default * 100:.2f}%")



```
n_estimators = 100  # Number of boosting stages (trees)
learning_rate = 0.1  # Step size used to update the model in each iteration
max_depth = 3  # Maximum depth of the individual trees
min_samples_split = 2  # Minimum number of samples required to split an internal node
subsample = 1.0  # Proportion of samples used for fitting each tree (0.0 to 1.0)
loss = 'deviance'  # Loss function to minimize ('deviance' for logistic regression or 'exponential' for AdaBoost)

# Initialize the Gradient Boosting classifier with custom parameters
gb_custom = GradientBoostingClassifier(n_estimators=n_estimators,
                                      learning_rate=learning_rate,
                                      max_depth=max_depth,
                                      min_samples_split=min_samples_split,
                                      subsample=subsample,
                                      loss=loss)
```



# Bagging

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=50)
bagging.fit(X_train, y_train)
y_pred = bagging.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"Bagging Training Accuracy: {accuracy * 100:.2f}%")

# Ada Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

ada_boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50)
ada_boost.fit(X_train, y_train)
y_pred = ada_boost.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"AdaBoost Training Accuracy: {accuracy * 100:.2f}%")


#Ensemble Model

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

model1 = LogisticRegression()
model2 = SVC()
model3 = DecisionTreeClassifier()

voting_clf = VotingClassifier(estimators=[('lr', model1), ('svc', model2), ('dt', model3)], voting='hard')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"Voting Classifier Training Accuracy: {accuracy * 100:.2f}%")

#ExtraTreesClassifier (Extremely Randomized Trees)

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score

et = ExtraTreesClassifier(n_estimators=100, max_depth=10)
et.fit(X_train, y_train)
y_pred = et.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"ExtraTrees Training Accuracy: {accuracy * 100:.2f}%")


#XGBoost

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"XGBoost Training Accuracy: {accuracy * 100:.2f}%")

#LightGBM (Light Gradient Boosting Machine)
used for large datasets

In [None]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

lgb_model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1)
lgb_model.fit(X_train, y_train)
y_pred = lgb_model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"LightGBM Training Accuracy: {accuracy * 100:.2f}%")

#CatBoost
for Categorical Variables

In [None]:
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

catboost_model = CatBoostClassifier(iterations=100, learning_rate=0.1)
catboost_model.fit(X_train, y_train)
y_pred = catboost_model.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"CatBoost Training Accuracy: {accuracy * 100:.2f}%")

#Neural Networks (MLPClassifier)
A multi-layer perceptron (MLP) neural network classifier for non-linear decision boundaries.

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"MLP Training Accuracy: {accuracy * 100:.2f}%")

#SGD Classifier (Stochastic Gradient Descent)

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

sgd = SGDClassifier(max_iter=1000)
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"SGD Classifier Training Accuracy: {accuracy * 100:.2f}%")

#Perceptron

In [None]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

perceptron = Perceptron()
perceptron.fit(X_train, y_train)
y_pred = perceptron.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"Perceptron Training Accuracy: {accuracy * 100:.2f}%")

##Dimensionality Reduction Methods

#Principal Component Analysis (PCA)

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

X_scaled = StandardScaler().fit_transform(X_train)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# t-Distributed Stochastic Neighbor Embedding (t-SNE)

In [None]:
from sklearn.manifold import TSNE
X_tsne = TSNE(n_components=2).fit_transform(X_train)

#Linear Discriminant Analysis (LDA)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2)
X_lda = lda.fit_transform(X_train, y_train)

#Independent Component Analysis (ICA)
Similar to PCA, but finds statistically independent components rather than uncorrelated ones.

In [None]:
from sklearn.decomposition import FastICA
ica = FastICA(n_components=2)
X_ica = ica.fit_transform(X_train)

##Model Evaluation and Selection

#Cross-Validation (KFold, StratifiedKFold)

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(ridge_clf, X_train, y_train, cv=5)
print(f"Cross-validation scores: {scores}")

#Grid Search (Hyperparameter Tuning)

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")

#Randomized Search (Hyperparameter Tuning)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
param_dist = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
random_search = RandomizedSearchCV(SVC(), param_dist, cv=5)
random_search.fit(X_train, y_train)
print(f"Best parameters: {random_search.best_params_}")