In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
import numpy as np
import pickle

# Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Separate the input feature 'css' and output targets
X = df[['CCS']]
y = df.drop('CCS', axis=1)

# Handle missing values in y
imputer = SimpleImputer(strategy='mean')
y = pd.DataFrame(imputer.fit_transform(y), columns=y.columns)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Scale the input data
def scaler_standard(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    with open("StandardScaler.pkl", "wb") as file:
        pickle.dump(scaler, file)
    
    return X_train_scaled, X_test_scaled

X_train_scaled, X_test_scaled = scaler_standard(X_train, X_test)

# Initialize the base regressor
base_regressor = RandomForestRegressor(random_state=0)

# Use MultiOutputRegressor for handling multiple outputs
multi_output_regressor = MultiOutputRegressor(base_regressor)

# Define parameter grid for GridSearchCV
parameters = {
    'estimator__n_estimators': [100, 200, 300],
    'estimator__max_depth': [None, 15, 30, 45],
    'estimator__min_samples_split': [2, 10, 20]
}

# Perform GridSearchCV
clf = GridSearchCV(multi_output_regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=3, verbose=1, n_jobs=-1)
clf.fit(X_train_scaled, y_train)

# Save the best model
with open('modelforprediction.pkl', 'wb') as file:
    pickle.dump(clf.best_estimator_, file)

# Predict on the test set
y_pred = clf.predict(X_test_scaled)

# Evaluate the model
mse = ((y_test - y_pred) ** 2).mean().mean()
print(f'Mean Squared Error: {mse}')

print("Model saved successfully.")


Fitting 3 folds for each of 36 candidates, totalling 108 fits
Mean Squared Error: 104983.45853413708
Model saved successfully.


In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
import numpy as np
import pickle

# Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Separate the input feature 'CCS' and output targets
X = df[['CCS']]
y = df.drop('CCS', axis=1)

# Handle missing values in y
imputer = SimpleImputer(strategy='mean')
y = pd.DataFrame(imputer.fit_transform(y), columns=y.columns)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Scale the input data
def scaler_standard(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    with open("StandardScaler.pkl", "wb") as file:
        pickle.dump(scaler, file)
    
    return X_train_scaled, X_test_scaled

X_train_scaled, X_test_scaled = scaler_standard(X_train, X_test)

# Initialize the base regressor
base_regressor = GradientBoostingRegressor(random_state=0)

# Use MultiOutputRegressor for handling multiple outputs
multi_output_regressor = MultiOutputRegressor(base_regressor)

# Define parameter grid for GridSearchCV
parameters = {
    'estimator__n_estimators': [100, 200, 300],
    'estimator__max_depth': [3, 5, 7],
    'estimator__learning_rate': [0.01, 0.1, 0.2],
    'estimator__min_samples_split': [2, 5, 10]
}

# Perform GridSearchCV
clf = GridSearchCV(multi_output_regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=3, verbose=1, n_jobs=-1)
clf.fit(X_train_scaled, y_train)

# Save the best model
with open('modelforprediction.pkl', 'wb') as file:
    pickle.dump(clf.best_estimator_, file)

# Predict on the test set
y_pred = clf.predict(X_test_scaled)

# Evaluate the model
mse = ((y_test - y_pred) ** 2).mean().mean()
print(f'Mean Squared Error: {mse}')

print("Model saved successfully.")


Fitting 3 folds for each of 81 candidates, totalling 243 fits


KeyboardInterrupt: 

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.impute import SimpleImputer
import pickle

# Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Separate the input feature 'CCS' and output targets
X = df[['CCS']]
y = df.drop('CCS', axis=1)

# Handle missing values in y
imputer = SimpleImputer(strategy='mean')
y = pd.DataFrame(imputer.fit_transform(y), columns=y.columns)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Scale the input data
def scaler_standard(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    with open("StandardScaler.pkl", "wb") as file:
        pickle.dump(scaler, file)
    
    return X_train_scaled, X_test_scaled

X_train_scaled, X_test_scaled = scaler_standard(X_train, X_test)

# Initialize the base regressor
base_regressor = SVR()

# Use MultiOutputRegressor for handling multiple outputs
multi_output_regressor = MultiOutputRegressor(base_regressor)

# Define parameter grid for GridSearchCV
parameters = {
    'estimator__C': [0.1, 1, 10],
    'estimator__epsilon': [0.01, 0.1, 0.2],
    'estimator__kernel': ['linear', 'poly', 'rbf']
}

# Perform GridSearchCV
clf = GridSearchCV(multi_output_regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=3, verbose=1, n_jobs=-1)
clf.fit(X_train_scaled, y_train)

# Save the best model
with open('modelforprediction.pkl', 'wb') as file:
    pickle.dump(clf.best_estimator_, file)

# Predict on the test set
y_pred = clf.predict(X_test_scaled)

# Evaluate the model
mse = ((y_test - y_pred) ** 2).mean().mean()
print(f'Mean Squared Error: {mse}')

print("Model saved successfully.")


Fitting 3 folds for each of 27 candidates, totalling 81 fits
Mean Squared Error: 107066.94807731196
Model saved successfully.


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import pickle

# Load the dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Separate the input feature 'CCS' and output targets
X = df[['CCS']]
y = df.drop('CCS', axis=1)

# Handle missing values in y
imputer = SimpleImputer(strategy='mean')
y = pd.DataFrame(imputer.fit_transform(y), columns=y.columns)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Scale the input data
def scaler_standard(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    with open("StandardScaler.pkl", "wb") as file:
        pickle.dump(scaler, file)
    
    return X_train_scaled, X_test_scaled

X_train_scaled, X_test_scaled = scaler_standard(X_train, X_test)

# Define the base Gradient Boosting Regressor
base_regressor = GradientBoostingRegressor(random_state=0)

# Use MultiOutputRegressor for handling multiple outputs
multi_output_regressor = MultiOutputRegressor(base_regressor)

# Define parameter grid for GridSearchCV
parameters = {
    'estimator__n_estimators': [100, 200, 300],
    'estimator__max_depth': [3, 5, 7],
    'estimator__learning_rate': [0.01, 0.1, 0.2],
    'estimator__min_samples_split': [2, 5, 10]
}

# Perform GridSearchCV
clf = GridSearchCV(multi_output_regressor, param_grid=parameters, scoring='neg_mean_squared_error', cv=3, verbose=1, n_jobs=-1)
clf.fit(X_train_scaled, y_train)

# Save the best model
with open('gbr_model_for_prediction.pkl', 'wb') as file:
    pickle.dump(clf.best_estimator_, file)

# Predict on the test set
y_pred = clf.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R-squared: {r2}')

print("Model saved successfully.")


Fitting 3 folds for each of 81 candidates, totalling 243 fits
Mean Squared Error: 100661.6284407742
Root Mean Squared Error: 317.27216776889554
Mean Absolute Error: 61.697817807248875
R-squared: 0.07576671016970488
Model saved successfully.
