## Importing the Libraries

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

## Importing the Dataset

In [None]:
df = pd.read_csv('avalanche_data_clean.csv')

In [None]:
df.head()

In [None]:
df.shape

## Splitting the features and label from the dataset

In [None]:
X = df.drop(columns=['risk_index'])
y = df['risk_index']

## Splitting the dataset into training set and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

## Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## GridSearch on Logistic Regression

In [None]:
classifier = LogisticRegression()

param_grid = {'C': [0.1, 1.0, 10.0], 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on Decision Tree Classifier

In [None]:
classifier = DecisionTreeClassifier()

param_grid = {'max_depth': [None, 5, 10, 15, 20],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on Random Forest Classifier

In [None]:
classifier = RandomForestClassifier()

param_grid = {'n_estimators': [10, 50, 100, 200],
              'max_depth': [None, 10, 20, 30],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'bootstrap': [True, False]}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on SVC

In [None]:
classifier = SVC()

# Define the hyperparameter grid for the classifier
param_grid = {'C': [0.1, 1.0, 10.0],
              'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
              'gamma': ['scale', 'auto']}

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best hyperparameters and accuracy
print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)