In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import yaml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, Perceptron, \
    PassiveAggressiveClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, \
    ExtraTreesClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
import os

# Load configuration from YAML file
def load_config(config_path: str) -> dict:
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

# Load and preprocess data
def main(config_path: str):
    config = load_config(config_path)

    # Load the preprocessed data
    df = pd.read_csv('/content/drive/MyDrive/projects/Stock/Preprocessed.csv')
    df.dropna(inplace = True)

    # Define features and labels based on the config
    X = df[config['data']['features']]  # Features based on the config
    y = df[config['data']['label']]  # Labels based on the config
    X = X.loc[:, ~X.columns.isin(['Open', 'Close Time'])]

    # Train/Test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scaling features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # List of models to test
    models = {
        "Logistic Regression": LogisticRegression(),
        "KNeighbors Classifier": KNeighborsClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(),
        "Gradient Boosting": GradientBoostingClassifier(),
        "AdaBoost": AdaBoostClassifier(),
        "Extra Trees": ExtraTreesClassifier(),
        "Bagging": BaggingClassifier(),
        "MLP Classifier": MLPClassifier(),
        "SVC": SVC(),
        "Linear SVC": LinearSVC(),
        "Ridge Classifier": RidgeClassifier(),
        "SGD Classifier": SGDClassifier(),
        "GaussianNB": GaussianNB(),
        "Quadratic Discriminant Analysis": QuadraticDiscriminantAnalysis(),
        "Perceptron": Perceptron(),
        "Passive Aggressive": PassiveAggressiveClassifier(),
        "Nearest Centroid": NearestCentroid(),
        "XGBoost Classifier": XGBClassifier()
    }

    # Evaluate each model
    for name, model in models.items():
        if name == "XGBoost Classifier":
            # Remap the labels from [-1, 0, 1] to [2, 0, 1] for compatibility with XGBoost
            y_train_mapped = y_train.map({-1: 2, 0: 0, 1: 1})
            y_test_mapped = y_test.map({-1: 2, 0: 0, 1: 1})
            model.fit(X_train, y_train_mapped)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test_mapped, y_pred)
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

        with open('/content/drive/MyDrive/projects/Stock/model_performance_colab.txt', 'a') as txt:
            txt.write(f'{name} Accuracy: {accuracy:.4f}\n')

# Replace with your Google Drive path to the config file
if __name__ == "__main__":
    main('/content/drive/MyDrive/projects/Stock/config.yaml')




In [None]:
import pandas as pd
import yaml
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, \
    ExtraTreesClassifier, BaggingClassifier

# Load configuration from YAML file
def load_config(config_path: str) -> dict:
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

def main():
    try:
        # Load configuration
        config = load_config('/content/drive/MyDrive/projects/Stock/config.yaml')  # Specify the path to your config file

        # Load preprocessed data directly from CSV file
        df = pd.read_csv('/content/drive/MyDrive/projects/Stock/Preprocessed.csv')

        # Drop rows with missing values
        df.dropna(inplace=True)

        # Define features and labels based on the config
        X = df[config['data']['features']]
        X = X.loc[:, ~X.columns.isin(['Open', 'Close Time'])]
        y = df[config['data']['label']]

        # Hyperparameter space for Extra Trees
        param_space = {
            'n_estimators': [50, 100, 200, 500],
            'max_features': ['sqrt', 'log2', None],
            'max_depth': [None, 10, 20, 30, 40, 50],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'bootstrap': [True, False]
        }

        # Set up K-Fold cross-validation
        kf = KFold(n_splits=5, shuffle=True, random_state=42)

        best_accuracy = 0
        best_params = None
        results = []

        # Randomized search with cross-validation
        for i in range(15):  # 15 iterations
            model = ExtraTreesClassifier()

            # RandomizedSearchCV
            random_search = RandomizedSearchCV(
                estimator=model,
                param_distributions=param_space,
                n_iter=10,  # Number of parameter settings sampled
                cv=kf,
                scoring='accuracy',
                random_state=42,
                n_jobs=-1,
                verbose=1
            )

            # Fit and evaluate
            random_search.fit(X, y)
            accuracy = random_search.best_score_
            params = random_search.best_params_

            results.append((accuracy, params))

            # Check if this is the best accuracy
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = params

        # Log best parameters to a file
        with open('/content/drive/MyDrive/projects/Stock/best_extra_trees_params.txt', 'w') as f:
            f.write(f'Best Accuracy: {best_accuracy:.4f}\n')
            f.write(f'Best Hyperparameters: {best_params}\n')

    except Exception as e:
        print(f'Error occurred: {e}')


if __name__ == "__main__":
    main()


Fitting 5 folds for each of 10 candidates, totalling 50 fits


KeyboardInterrupt: 

In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
import yaml

# Load configuration from YAML file
def load_config(config_path: str) -> dict:
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config


def main():
    try:
        # Load configuration
        config = load_config('/content/drive/MyDrive/projects/Stock/config.yaml')

        # Load preprocessed data directly from CSV file
        df = pd.read_csv('/content/drive/MyDrive/projects/Stock/Preprocessed.csv')

        # Drop rows with missing values
        df.dropna(inplace=True)

        # Define features and labels based on the config
        X = df[config['data']['features']]
        X = X.loc[:, ~X.columns.isin(['Open', 'Close Time'])]
        y = df[config['data']['label']]

        # Map the target labels: -1 to 2, 0 to 0, and 1 to 1
        y_mapped = y.map({-1: 2, 0: 0, 1: 1})

        # Convert categorical columns to numerical (if applicable)
        # For example, using one-hot encoding
        X = pd.get_dummies(X, drop_first=True)

        # Hyperparameter space for XGBoost
        param_space = {
            'n_estimators': [50, 100, 200, 500],
            'max_depth': [3, 5, 7, 10],
            'learning_rate': [0.01, 0.1, 0.2],
            'subsample': [0.5, 0.7, 1],
            'colsample_bytree': [0.5, 0.7, 1],
            'gamma': [0, 0.1, 0.2]
        }

        # Set up K-Fold cross-validation
        kf = KFold(n_splits=5, shuffle=True, random_state=42)

        best_accuracy = 0
        best_params = None
        results = []

        # Randomized search with cross-validation
        for i in range(15):  # 15 iterations
            model = XGBClassifier(
                n_estimators=100,          # or your desired value
                max_depth=5,               # or your desired value
                learning_rate=0.1,         # or your desired value
                subsample=0.8,             # or your desired value
                colsample_bytree=0.8,      # or your desired value
                gamma=0,                   # or your desired value
                tree_method="hist",        # or "approx" for large datasets
                )  # Specify GPU usage

            # RandomizedSearchCV
            random_search = RandomizedSearchCV(
                estimator=model,
                param_distributions=param_space,
                n_iter=10,
                cv=kf,
                scoring='accuracy',
                random_state=42,
                n_jobs=-1,
                verbose=1
            )

            # Fit and evaluate
            random_search.fit(X, y_mapped)  # Use the mapped labels
            accuracy = random_search.best_score_
            params = random_search.best_params_

            results.append((accuracy, params))

            # Check if this is the best accuracy
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = params

        # Log best parameters to a file
        with open('/content/drive/MyDrive/projects/Stock/best_xgboost_params.txt', 'w') as f:
            f.write(f'Best Accuracy: {best_accuracy:.4f}\n')
            f.write(f'Best Hyperparameters: {best_params}\n')

    except Exception as e:
        print(f'Error occurred: {e}')


if __name__ == "__main__":
    main()
