In [2]:

import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv("/content/dataframe.csv")
# Load your data (Here, I'm using the Iris dataset as an ex

In [7]:
X = df[["distance", "area per pixel"]]
y = df["Bags used "]
X_test = pd.read_csv("/content/test_dataframe.csv")

In [12]:
# Feature scaling (important for SVMs)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test[["distance", "area per pixel"]])

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],            # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001],    # Kernel coefficient for 'rbf', 'poly' and 'sigmoid'
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'] # Kernel type
}

# Create a GridSearchCV object
grid = GridSearchCV(SVR(), param_grid, refit=True, cv=10, verbose=2, n_jobs = -1)

# Fit the model using grid search
grid.fit(X_scaled, y)

# Display the best parameters found by grid search
print("Best Parameters found: ", grid.best_params_)



Fitting 10 folds for each of 64 candidates, totalling 640 fits
Best Parameters found:  {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}


In [19]:
model = grid.best_estimator_
predictions = model.predict(X_test_scaled)
X_test["predictions"] = predictions

In [17]:
X_test[["Pothole", "predictions"]].to_csv("submission3.csv", index = False)

In [None]:
X_test

In [18]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],         # Number of boosting stages
    'learning_rate': [0.01, 0.1, 0.2],       # Step size shrinkage used in updates
    'max_depth': [3, 4, 5],                  # Maximum depth of the individual estimators
    'min_samples_split': [2, 5, 10],         # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],           # Minimum number of samples required to be at a leaf node
    'subsample': [0.8, 0.9, 1.0]             # Fraction of samples used for fitting the individual base learners
}

# Create a GridSearchCV object
grid = GridSearchCV(GradientBoostingRegressor(), param_grid, refit=True, cv=5, verbose=2, n_jobs=-1)

# Fit the model using grid search
grid.fit(X_scaled, y)

# Display the best parameters found by grid search
print("Best Parameters found: ", grid.best_params_)


Fitting 5 folds for each of 729 candidates, totalling 3645 fits
Best Parameters found:  {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100, 'subsample': 0.8}


In [20]:
model = grid.best_estimator_
predictions = model.predict(X_test_scaled)
X_test["predictions"] = predictions

In [23]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],       # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],       # Maximum depth of the trees
    'min_samples_split': [2, 5, 10],       # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],         # Minimum number of samples required to be at a leaf node
    'bootstrap': [True, False]             # Whether bootstrap samples are used when building trees
}

# Create a GridSearchCV object
grid = GridSearchCV(RandomForestRegressor(), param_grid, refit=True, cv=10, verbose=2, n_jobs=-1)

# Fit the model using grid search
grid.fit(X_scaled, y)

# Display the best parameters found by grid search
print("Best Parameters found: ", grid.best_params_)


Fitting 10 folds for each of 216 candidates, totalling 2160 fits
Best Parameters found:  {'bootstrap': True, 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200}


In [24]:
model = grid.best_estimator_
predictions = model.predict(X_test_scaled)
X_test["predictions"] = predictions

In [25]:
X_test[["Pothole", "predictions"]].to_csv("submission5.csv", index = False)

In [26]:
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# Define the parameter grid
param_grid = {
    'iterations': [100, 200, 300],             # Number of boosting iterations
    'learning_rate': [0.01, 0.1, 0.2],         # Learning rate
    'depth': [3, 4, 5],                        # Depth of the trees
    'l2_leaf_reg': [1, 3, 5],                  # L2 regularization term on weights
    'border_count': [32, 64, 128],             # Number of splits for numerical features
}

# Initialize CatBoostRegressor
catboost_model = CatBoostRegressor(eval_metric='RMSE', verbose=0)

# Create a GridSearchCV object
grid = GridSearchCV(estimator=catboost_model, param_grid=param_grid, cv=10, n_jobs=-1, verbose=2)

# Fit the model using grid search
grid.fit(X_scaled, y)

# Display the best parameters found by grid search
print("Best Parameters found: ", grid.best_params_)

Fitting 10 folds for each of 243 candidates, totalling 2430 fits
Best Parameters found:  {'border_count': 128, 'depth': 3, 'iterations': 100, 'l2_leaf_reg': 3, 'learning_rate': 0.2}


In [27]:
model = grid.best_estimator_
predictions = model.predict(X_test_scaled)
X_test["predictions"] = predictions

In [None]:
X_test[["Pothole", "predictions"]].to_csv("submission6.csv", index = False)