<a href="https://colab.research.google.com/github/dimpalsonawane08/DT-assignment/blob/main/DT_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json

def parse_target_config(json_data):
    # Extracting the 'target' section from the JSON
    target_config = json_data.get("target", {})

    # Reading the required information
    prediction_type = target_config.get("prediction_type", "Not specified")
    target_variable = target_config.get("target", "Not specified")
    regression_type = target_config.get("type", "Not specified")
    partitioning = target_config.get("partitioning", False)

    # Printing the extracted information
    print(f"Prediction Type: {prediction_type}")
    print(f"Target Variable: {target_variable}")
    print(f"Regression Type: {regression_type}")
    print(f"Partitioning Enabled: {partitioning}")

# Example JSON input
json_input = """
{
  "target": {
    "prediction_type": "Regression",
    "target": "petal_width",
    "type": "regression",
    "partitioning": true
  }
}
"""

# Parse the provided JSON
json_data = json.loads(json_input)
parse_target_config(json_data)


Prediction Type: Regression
Target Variable: petal_width
Regression Type: regression
Partitioning Enabled: True


In [None]:
import pandas as pd
import json

# Your JSON input as a string
json_input = """
{
  "feature_handling": {
    "sepal_length": {
      "feature_name": "sepal_length",
      "is_selected": true,
      "feature_variable_type": "numerical",
      "feature_details": {
        "numerical_handling": "Keep as regular numerical feature",
        "rescaling": "No rescaling",
        "make_derived_feats": false,
        "missing_values": "Impute",
        "impute_with": "Average of values",
        "impute_value": 0
      }
    }
  }
}
"""

# Parse the JSON input to get the configuration
feature_config = json.loads(json_input)

# Function to apply imputation based on the feature configuration
def apply_imputation(df, feature_config):
    for feature, config in feature_config["feature_handling"].items():
        if config["feature_details"]["missing_values"] == "Impute":
            if config["feature_details"]["impute_with"] == "Average of values":
                # Calculate the average without considering NaN values
                avg_value = df[feature].mean()
                # Fill NaN values with the calculated average
                df[feature].fillna(avg_value, inplace=True)
            # Extend this block to handle other imputation methods as needed

# Load your dataset
df = pd.read_csv('iris.csv')

# Apply the imputation to the DataFrame based on the configuration
apply_imputation(df, feature_config)

# Display the DataFrame to verify the imputation
print(df)


     sepal_length  sepal_width  petal_length  petal_width         species
0             5.1          3.5           1.4          0.2     Iris-setosa
1             4.9          3.0           1.4          0.2     Iris-setosa
2             4.7          3.2           1.3          0.2     Iris-setosa
3             4.6          3.1           1.5          0.2     Iris-setosa
4             5.0          3.6           1.4          0.2     Iris-setosa
..            ...          ...           ...          ...             ...
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

[150 rows x 5 columns]


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from scipy.stats import pearsonr

# Assuming df is your DataFrame and target_variable is your target column's name
def apply_feature_reduction(df, target_variable, config):
    method = config["feature_reduction_method"]
    reduced_df = df.copy()

    # No Reduction
    if config["No Reduction"]["is_selected"]:
        # Assuming 'No Reduction' simply means limiting the number of features without any specific method
        num_features = config["No Reduction"]["num_of_features_to_keep"]
        reduced_df = reduced_df.iloc[:, :num_features]

    # Correlation with Target
    elif config["Correlation with target"]["is_selected"]:
        num_features = config["Correlation with target"]["num_of_features_to_keep"]
        corr_scores = {col: pearsonr(df[col], df[target_variable])[0] for col in df.columns if df[col].dtype != 'object' and col != target_variable}
        sorted_features = sorted(corr_scores, key=corr_scores.get, reverse=True)[:num_features]
        reduced_df = df[sorted_features + [target_variable]]

    # Tree-based
    elif config["Tree-based"]["is_selected"]:
        num_features = config["Tree-based"]["num_of_features_to_keep"]
        clf = ExtraTreesClassifier(n_estimators=config["Tree-based"]["num_of_trees"])
        clf = clf.fit(df.drop(target_variable, axis=1), df[target_variable])
        importances = clf.feature_importances_
        indices = np.argsort(importances)[::-1][:num_features]
        selected_features = df.columns[indices]
        reduced_df = df[selected_features.tolist() + [target_variable]]

    # PCA
    elif config["Principal Component Analysis"]["is_selected"]:
        num_features = config["Principal Component Analysis"]["num_of_features_to_keep"]
        pca = PCA(n_components=num_features)
        principalComponents = pca.fit_transform(df.drop(target_variable, axis=1))
        reduced_df = pd.DataFrame(data = principalComponents, columns = [f'PC{i}' for i in range(1, num_features + 1)])
        reduced_df[target_variable] = df[target_variable]

    return reduced_df

# Example usage:
json_config = {
  "feature_reduction_method": "Correlation with target",
  "No Reduction": {"is_selected": True, "num_of_features_to_keep": 5},
  "Correlation with target": {"is_selected": False, "num_of_features_to_keep": 8},
  "Tree-based": {"is_selected": False, "num_of_features_to_keep": 0, "depth_of_trees": 0, "num_of_trees": 0},
  "Principal Component Analysis": {"is_selected": False, "num_of_features_to_keep": 0},
}

# Load your dataset
df = pd.read_csv('iris.csv')
target_variable = 'YourTargetColumnNameHere'

# Apply feature reduction
reduced_df = apply_feature_reduction(df, target_variable, json_config)

# Check the result
print(reduced_df.head())


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [None]:
import json
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

# Sample JSON configuration
json_config = """
{
  "prediction_type": "Classification",
  "models": {
    "LogisticRegression": {
      "model_name": "LogisticRegression",
      "is_selected": true,
      "parallelism": 2,
      "min_iter": 30,
      "max_iter": 50,
      "min_regparam": 0.5,
      "max_regparam": 0.8,
      "min_elasticnet": 0.5,
      "max_elasticnet": 0.8
    }
  }
}
"""

# Function to parse JSON and instantiate models
def instantiate_model_from_json(json_str):
    config = json.loads(json_str)
    models = []

    if config["prediction_type"] == "Classification":
        # For each model configuration
        for model_name, model_config in config["models"].items():
            if model_config["is_selected"]:
                if model_name == "LogisticRegression":
                    # Example: Instantiate logistic regression with averaged parameters
                    # Adjust the instantiation as needed based on the parameters you want to use
                    lr = LogisticRegression(
                        max_iter=int((model_config["min_iter"] + model_config["max_iter"]) / 2),
                        C=1.0 / ((model_config["min_regparam"] + model_config["max_regparam"]) / 2),  # Inverse of regularization strength
                        # L1 ratio or other parameters related to elastic net can be set similarly
                    )
                    models.append(lr)
                # Extend with elif blocks for other classification models as needed
    elif config["prediction_type"] == "Regression":
        # Instantiate regression models similarly, for example:
        pass  # Add logic for regression models here

    return models

# Example usage
models = instantiate_model_from_json(json_config)
for model in models:
    print(model)


LogisticRegression(C=1.5384615384615383, max_iter=40)


In [None]:
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
import numpy as np

# Sample data
X_train = np.random.rand(100, 10)
y_train = np.random.rand(100)
X_test = np.random.rand(50, 10)
y_test=np.random.rand(50)

# Define models and their parameter grids
models = {
    "RandomForest": (RandomForestRegressor(), {'n_estimators': [10, 50, 100], 'max_depth': [None, 10, 20]}),
    "SVR": (SVR(), {'kernel': ['linear', 'rbf'], 'C': [0.1, 1, 10]}),
    "LinearRegression": (LinearRegression(), {'fit_intercept': [True, False]})
}

# TimeSeriesSplit cross-validation with overlap
tscv = TimeSeriesSplit(n_splits=5)

# Iterate through each model, perform GridSearchCV, and fit the data
for name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=tscv)
    grid_search.fit(X_train, y_train)

    print(f"Best parameters for {name}: {grid_search.best_params_}")
    print(f"Best score for {name}: {grid_search.best_score_}")

    # Predict using the best estimator obtained from GridSearchCV
    predictions = grid_search.predict(X_test)
    print(f"Predictions for {name}: {predictions}")


Best parameters for RandomForest: {'max_depth': 20, 'n_estimators': 100}
Best score for RandomForest: -0.24787319806312685
Predictions for RandomForest: [0.37434416 0.48825906 0.56405843 0.56757672 0.52966837 0.53736262
 0.66602753 0.45730846 0.42546368 0.55040461 0.62562038 0.57161722
 0.53618962 0.57334666 0.50861142 0.4799144  0.55325484 0.63170151
 0.5757476  0.71589786 0.59613496 0.46445903 0.44513736 0.48667665
 0.62038129 0.4350278  0.60602586 0.46476412 0.53588438 0.45628066
 0.50654763 0.58227094 0.57276554 0.4046303  0.53823337 0.44959865
 0.52651779 0.54163115 0.3030529  0.46686691 0.56939142 0.50420507
 0.42515199 0.42081531 0.60709961 0.40971738 0.50637711 0.50211444
 0.62684909 0.48087206]
Best parameters for SVR: {'C': 0.1, 'kernel': 'rbf'}
Best score for SVR: -0.12244543438850788
Predictions for SVR: [0.48082268 0.55028492 0.53211043 0.64947423 0.45755237 0.52620468
 0.62951871 0.4937738  0.43818414 0.47230851 0.54348732 0.48716057
 0.59986974 0.544194   0.45742082 0.61

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Iterate through each model, perform GridSearchCV, and fit the data
for name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=tscv)
    grid_search.fit(X_train, y_train)

    print(f"Best parameters for {name}: {grid_search.best_params_}")
    print(f"Best score for {name}: {grid_search.best_score_}")

    # Predict using the best estimator obtained from GridSearchCV
    predictions = grid_search.predict(X_test)
    print(f"Predictions for {name}: {predictions}")

    # Evaluate model performance
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    print(f"Mean Absolute Error for {name}: {mae}")
    print(f"Mean Squared Error for {name}: {mse}")
    print(f"R-squared for {name}: {r2}")


Best parameters for RandomForest: {'max_depth': None, 'n_estimators': 100}
Best score for RandomForest: -0.27311972506411764
Predictions for RandomForest: [0.43379956 0.4864721  0.55717641 0.5336191  0.49911153 0.48976002
 0.66603942 0.47489381 0.46398326 0.54742097 0.60171922 0.47988454
 0.5157346  0.59215858 0.51119663 0.55925441 0.5845795  0.65007929
 0.59490225 0.69228393 0.59363947 0.41378287 0.45750719 0.50166339
 0.58651666 0.47645117 0.62276944 0.51579104 0.50664792 0.49071669
 0.45644206 0.54820356 0.64005625 0.38983969 0.53887699 0.45342631
 0.52214896 0.56665623 0.34407672 0.40990283 0.54337555 0.43571491
 0.49253014 0.38125612 0.534983   0.41294354 0.51370574 0.49083115
 0.66048227 0.44099258]
Mean Absolute Error for RandomForest: 0.25258915108169994
Mean Squared Error for RandomForest: 0.08572999562089607
R-squared for RandomForest: -0.05444848174994377
Best parameters for SVR: {'C': 0.1, 'kernel': 'rbf'}
Best score for SVR: -0.12244543438850788
Predictions for SVR: [0.480

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
import json

def feature_handling_pipeline():
    # Define preprocessing steps for feature handling
    preprocessing_steps = [('scaler', StandardScaler())]

    # Create feature handling pipeline
    feature_handling_pipe = Pipeline(steps=preprocessing_steps)
    return feature_handling_pipe

def feature_reduction_pipeline():
    # Define preprocessing steps for feature reduction
    preprocessing_steps = [('pca', PCA())]

    # Create feature reduction pipeline
    feature_reduction_pipe = Pipeline(steps=preprocessing_steps)
    return feature_reduction_pipe

def model_fit_pipeline(algo, param_grid, cv):
    # Define model and its respective parameter grid
    models = {
        "RandomForest": (RandomForestRegressor(), param_grid),
        "SVR": (SVR(), param_grid),
        "LinearRegression": (LinearRegression(), param_grid)
    }

    # Create model fitting pipeline
    model_pipe = Pipeline(steps=[
        ('model', GridSearchCV(models[algo][0], models[algo][1], cv=cv))
    ])
    return model_pipe

def parse_json_config(json_config):
    # Parse JSON configuration
    config = json.loads(json_config)

    # Check if Grid Search is selected
    if config.get("Grid Search", {}).get("is selected", False):
        # Assuming only one algorithm is selected at a time
        for algo, algo_config in config.items():
            if algo != "Grid Search" and algo_config.get("is selected", False):
                return algo, algo_config

    return None, None

def execute_pipeline(json_config):
    # Parse JSON configuration
    algo, algo_config = parse_json_config(json_config)
    if algo is None:
        print("No algorithm selected for execution.")
        return

    # Define pipelines for feature handling, feature reduction, and model fitting
    feature_handling_pipe = feature_handling_pipeline()
    feature_reduction_pipe = feature_reduction_pipeline()
    model_pipe = model_fit_pipeline(algo, algo_config.get("param_grid", {}), algo_config.get("cv"))

    # Combine pipelines using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('feature_handling', feature_handling_pipe, slice(None)),  # Apply feature handling to all columns
            ('feature_reduction', feature_reduction_pipe, slice(None))  # Apply feature reduction to all columns
        ]
    )

    # Combine preprocessor with model fitting pipeline
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model_pipe)
    ])

    # Example data (replace with your actual data)
    X_train = ...  # Your training features
    y_train = ...  # Your training labels

    # Fit the pipeline
    full_pipeline.fit(X_train, y_train)

    print(f"Best parameters for {algo}: {full_pipeline.named_steps['model'].best_params_}")
    print(f"Best score for {algo}: {full_pipeline.named_steps['model'].best_score_}")

    # Example data for prediction (replace with your actual data)
    X_test = ...  # Your test features
    predictions = full_pipeline.predict(X_test)
    print(f"Predictions for {algo}: {predictions}")

# Example JSON configuration
json_config = """
{
    "Grid Search": {
        "is selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""

# Execute the pipeline with the provided JSON configuration
execute_pipeline(json_config)


JSONDecodeError: Expecting value: line 9 column 31 (char 217)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
import json

def feature_handling_pipeline():
    # Define preprocessing steps for feature handling
    preprocessing_steps = [('scaler', StandardScaler())]

    # Create feature handling pipeline
    feature_handling_pipe = Pipeline(steps=preprocessing_steps)
    return feature_handling_pipe

def feature_reduction_pipeline():
    # Define preprocessing steps for feature reduction
    preprocessing_steps = [('pca', PCA())]

    # Create feature reduction pipeline
    feature_reduction_pipe = Pipeline(steps=preprocessing_steps)
    return feature_reduction_pipe

def model_fit_pipeline(algo, param_grid, cv):
    # Define model and its respective parameter grid
    models = {
        "RandomForest": (RandomForestRegressor(), param_grid),
        "SVR": (SVR(), param_grid),
        "LinearRegression": (LinearRegression(), param_grid)
    }

    # Create model fitting pipeline
    model_pipe = Pipeline(steps=[
        ('model', GridSearchCV(models[algo][0], models[algo][1], cv=cv))
    ])
    return model_pipe

def parse_json_config(json_config):
    # Parse JSON configuration
    config = json.loads(json_config)

    # Check if Grid Search is selected
    if config.get("Grid Search", {}).get("is_selected", False):
        # Assuming only one algorithm is selected at a time
        for algo, algo_config in config.items():
            if algo != "Grid Search" and algo_config.get("is_selected", False):
                return algo, algo_config

    return None, None

def execute_pipeline(json_config):
    # Parse JSON configuration
    algo, algo_config = parse_json_config(json_config)
    if algo is None:
        print("No algorithm selected for execution.")
        return

    # Define pipelines for feature handling, feature reduction, and model fitting
    feature_handling_pipe = feature_handling_pipeline()
    feature_reduction_pipe = feature_reduction_pipeline()
    model_pipe = model_fit_pipeline(algo, algo_config.get("param_grid", {}), algo_config.get("cv"))

    # Combine pipelines using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('feature_handling', feature_handling_pipe, slice(None)),  # Apply feature handling to all columns
            ('feature_reduction', feature_reduction_pipe, slice(None))  # Apply feature reduction to all columns
        ]
    )

    # Combine preprocessor with model fitting pipeline
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model_pipe)
    ])

    # Example data (replace with your actual data)
    X_train = ...  # Your training features
    y_train = ...  # Your training labels

    # Fit the pipeline
    full_pipeline.fit(X_train, y_train)

    print(f"Best parameters for {algo}: {full_pipeline.named_steps['model'].best_params_}")
    print(f"Best score for {algo}: {full_pipeline.named_steps['model'].best_score_}")

    # Example data for prediction (replace with your actual data)
    X_test = ...  # Your test features
    predictions = full_pipeline.predict(X_test)
    print(f"Predictions for {algo}: {predictions}")

# Corrected JSON configuration
json_config = """
{
    "Grid Search": {
        "is_selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""

# Execute the pipeline with the provided JSON configuration
execute_pipeline(json_config)


JSONDecodeError: Expecting value: line 9 column 31 (char 217)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
import json

def feature_handling_pipeline():
    # Define preprocessing steps for feature handling
    preprocessing_steps = [('scaler', StandardScaler())]

    # Create feature handling pipeline
    feature_handling_pipe = Pipeline(steps=preprocessing_steps)
    return feature_handling_pipe

def feature_reduction_pipeline():
    # Define preprocessing steps for feature reduction
    preprocessing_steps = [('pca', PCA())]

    # Create feature reduction pipeline
    feature_reduction_pipe = Pipeline(steps=preprocessing_steps)
    return feature_reduction_pipe

def model_fit_pipeline(algo, param_grid, cv):
    # Define model and its respective parameter grid
    models = {
        "RandomForest": (RandomForestRegressor(), param_grid),
        "SVR": (SVR(), param_grid),
        "LinearRegression": (LinearRegression(), param_grid)
    }

    # Create model fitting pipeline
    model_pipe = Pipeline(steps=[
        ('model', GridSearchCV(models[algo][0], models[algo][1], cv=cv))
    ])
    return model_pipe

def parse_json_config(json_config):
    # Parse JSON configuration
    config = json.loads(json_config)

    # Check if Grid Search is selected
    if config.get("Grid Search", {}).get("is_selected", False):
        # Assuming only one algorithm is selected at a time
        for algo, algo_config in config.items():
            if algo != "Grid Search" and algo_config.get("is_selected", False):
                return algo, algo_config

    return None, None

def execute_pipeline(json_config):
    # Parse JSON configuration
    algo, algo_config = parse_json_config(json_config)
    if algo is None:
        print("No algorithm selected for execution.")
        return

    # Define pipelines for feature handling, feature reduction, and model fitting
    feature_handling_pipe = feature_handling_pipeline()
    feature_reduction_pipe = feature_reduction_pipeline()
    model_pipe = model_fit_pipeline(algo, algo_config.get("param_grid", {}), algo_config.get("cv"))

    # Combine pipelines using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('feature_handling', feature_handling_pipe, slice(None)),  # Apply feature handling to all columns
            ('feature_reduction', feature_reduction_pipe, slice(None))  # Apply feature reduction to all columns
        ]
    )

    # Combine preprocessor with model fitting pipeline
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model_pipe)
    ])

    # Example data (replace with your actual data)
    X_train = ...  # Your training features
    y_train = ...  # Your training labels

    # Fit the pipeline
    full_pipeline.fit(X_train, y_train)

    print(f"Best parameters for {algo}: {full_pipeline.named_steps['model'].best_params_}")
    print(f"Best score for {algo}: {full_pipeline.named_steps['model'].best_score_}")

    # Example data for prediction (replace with your actual data)
    X_test = ...  # Your test features
    predictions = full_pipeline.predict(X_test)
    print(f"Predictions for {algo}: {predictions}")

# Corrected JSON configuration
json_config = """
{
    "Grid Search": {
        "is_selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""

# Execute the pipeline with the provided JSON configuration
execute_pipeline(json_config)


JSONDecodeError: Expecting value: line 9 column 31 (char 217)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
import json

def feature_handling_pipeline():
    # Define preprocessing steps for feature handling
    preprocessing_steps = [('scaler', StandardScaler())]

    # Create feature handling pipeline
    feature_handling_pipe = Pipeline(steps=preprocessing_steps)
    return feature_handling_pipe

def feature_reduction_pipeline():
    # Define preprocessing steps for feature reduction
    preprocessing_steps = [('pca', PCA())]

    # Create feature reduction pipeline
    feature_reduction_pipe = Pipeline(steps=preprocessing_steps)
    return feature_reduction_pipe

def model_fit_pipeline(algo, param_grid, cv):
    # Define model and its respective parameter grid
    models = {
        "RandomForest": (RandomForestRegressor(), param_grid),
        "SVR": (SVR(), param_grid),
        "LinearRegression": (LinearRegression(), param_grid)
    }

    # Create model fitting pipeline
    model_pipe = Pipeline(steps=[
        ('model', GridSearchCV(models[algo][0], models[algo][1], cv=cv))
    ])
    return model_pipe

def parse_json_config(json_config):
    # Parse JSON configuration
    config = json.loads(json_config)

    # Check if Grid Search is selected
    if config.get("Grid Search", {}).get("is_selected", False):
        # Assuming only one algorithm is selected at a time
        for algo, algo_config in config.items():
            if algo != "Grid Search" and algo_config.get("is_selected", False):
                return algo, algo_config

    return None, None

def execute_pipeline(json_config):
    # Parse JSON configuration
    algo, algo_config = parse_json_config(json_config)
    if algo is None:
        print("No algorithm selected for execution.")
        return

    # Define pipelines for feature handling, feature reduction, and model fitting
    feature_handling_pipe = feature_handling_pipeline()
    feature_reduction_pipe = feature_reduction_pipeline()
    model_pipe = model_fit_pipeline(algo, algo_config.get("param_grid", {}), algo_config.get("cv"))

    # Combine pipelines using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('feature_handling', feature_handling_pipe, slice(None)),  # Apply feature handling to all columns
            ('feature_reduction', feature_reduction_pipe, slice(None))  # Apply feature reduction to all columns
        ]
    )

    # Combine preprocessor with model fitting pipeline
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model_pipe)
    ])

    # Example data (replace with your actual data)
    X_train = ...  # Your training features
    y_train = ...  # Your training labels

    # Fit the pipeline
    full_pipeline.fit(X_train, y_train)

    print(f"Best parameters for {algo}: {full_pipeline.named_steps['model'].best_params_}")
    print(f"Best score for {algo}: {full_pipeline.named_steps['model'].best_score_}")

    # Example data for prediction (replace with your actual data)
    X_test = ...  # Your test features
    predictions = full_pipeline.predict(X_test)
    print(f"Predictions for {algo}: {predictions}")

# Corrected JSON configuration
json_config = """
{
    "Grid Search": {
        "is_selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""

# Execute the pipeline with the provided JSON configuration
execute_pipeline(json_config)


JSONDecodeError: Expecting value: line 9 column 31 (char 217)

In [None]:
# Corrected JSON configuration
json_config = """
{
    "Grid Search": {
        "is_selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
import json

def feature_handling_pipeline():
    # Define preprocessing steps for feature handling
    preprocessing_steps = [('scaler', StandardScaler())]

    # Create feature handling pipeline
    feature_handling_pipe = Pipeline(steps=preprocessing_steps)
    return feature_handling_pipe

def feature_reduction_pipeline():
    # Define preprocessing steps for feature reduction
    preprocessing_steps = [('pca', PCA())]

    # Create feature reduction pipeline
    feature_reduction_pipe = Pipeline(steps=preprocessing_steps)
    return feature_reduction_pipe

def model_fit_pipeline(algo, param_grid, cv):
    # Define model and its respective parameter grid
    models = {
        "RandomForest": (RandomForestRegressor(), param_grid),
        "SVR": (SVR(), param_grid),
        "LinearRegression": (LinearRegression(), param_grid)
    }

    # Create model fitting pipeline
    model_pipe = Pipeline(steps=[
        ('model', GridSearchCV(models[algo][0], models[algo][1], cv=cv))
    ])
    return model_pipe

def parse_json_config(json_config):
    # Parse JSON configuration
    config = json.loads(json_config)

    # Check if Grid Search is selected
    if config.get("Grid Search", {}).get("is_selected", False):
        # Assuming only one algorithm is selected at a time
        for algo, algo_config in config.items():
            if algo != "Grid Search" and algo_config.get("is_selected", False):
                return algo, algo_config

    return None, None

def execute_pipeline(json_config):
    # Parse JSON configuration
    algo, algo_config = parse_json_config(json_config)
    if algo is None:
        print("No algorithm selected for execution.")
        return

    # Define pipelines for feature handling, feature reduction, and model fitting
    feature_handling_pipe = feature_handling_pipeline()
    feature_reduction_pipe = feature_reduction_pipeline()
    model_pipe = model_fit_pipeline(algo, algo_config.get("param_grid", {}), algo_config.get("cv"))

    # Combine pipelines using ColumnTransformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('feature_handling', feature_handling_pipe, slice(None)),  # Apply feature handling to all columns
            ('feature_reduction', feature_reduction_pipe, slice(None))  # Apply feature reduction to all columns
        ]
    )

    # Combine preprocessor with model fitting pipeline
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model_pipe)
    ])

    # Example data (replace with your actual data)
    X_train = ...  # Your training features
    y_train = ...  # Your training labels

    # Fit the pipeline
    full_pipeline.fit(X_train, y_train)

    print(f"Best parameters for {algo}: {full_pipeline.named_steps['model'].best_params_}")
    print(f"Best score for {algo}: {full_pipeline.named_steps['model'].best_score_}")

    # Example data for prediction (replace with your actual data)
    X_test = ...  # Your test features
    predictions = full_pipeline.predict(X_test)
    print(f"Predictions for {algo}: {predictions}")

# Corrected JSON configuration
json_config = """
{
    "Grid Search": {
        "is_selected": true,
        "RandomForest": {
            "is_selected": true,
            "param_grid": {
                "n_estimators": [10, 50, 100],
                "max_depth": [None, 10, 20]
            },
            "cv": 5
        }
    }
}
"""

# Execute the pipeline with the provided JSON configuration
execute_pipeline(json_config)


JSONDecodeError: Expecting value: line 9 column 31 (char 217)