## Importing the Libraries

In [40]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

## Importing the Dataset

In [41]:
df = pd.read_csv('avalanche_data_clean.csv')

In [42]:
df.head()

Unnamed: 0,snow_type_dry,snow_type_unknown,snow_type_wet,trigger_type_explosive,trigger_type_natural,trigger_type_unknown,max_elevation,min_elevation,aspect_degrees,length,width,perimeter,area,aval_size_class,weight_AAI,risk_index
0,1,0,0,0,1,0,2562.0,2484.0,42.0,101.0,123.0,355.0,8762.0,2,2,2
1,1,0,0,0,1,0,2494.0,2356.0,21.0,127.0,351.0,834.0,30522.0,3,3,2
2,1,0,0,0,0,0,2115.0,2017.0,200.0,166.0,85.0,454.0,7837.0,2,2,2
3,1,0,0,0,1,0,2085.0,1986.0,42.0,128.0,18.0,265.0,1522.0,2,2,2
4,1,0,0,0,1,0,2605.0,2529.0,240.0,127.0,63.0,324.0,5929.0,2,2,2


In [43]:
df.shape

(13608, 16)

## Splitting the features and label from the dataset

In [48]:
X = df.drop(columns=['risk_index'])
y = df['risk_index']

## Splitting the dataset into training set and test set

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

## Feature Scaling

In [50]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## GridSearch on Logistic Regression

In [58]:
classifier = LogisticRegression()

param_grid = {'C': [0.1, 1.0, 10.0], 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

Best Hyperparameters: {'C': 0.1, 'solver': 'newton-cg'}
Accuracy on Test Data: 0.5944158706833211


## GridSearch on Decision Tree Classifier

In [59]:
classifier = DecisionTreeClassifier()

param_grid = {'max_depth': [None, 5, 10, 15, 20],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

Best Hyperparameters: {'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 5}
Accuracy on Test Data: 0.5969875091844232


## GridSearch on Random Forest Classifier

In [60]:
classifier = RandomForestClassifier()

param_grid = {'n_estimators': [10, 50, 100, 200],
              'max_depth': [None, 10, 20, 30],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'bootstrap': [True, False]}

grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

Best Hyperparameters: {'bootstrap': True, 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy on Test Data: 0.5984570168993387


## GridSearch on SVC

In [None]:
classifier = SVC()

# Define the hyperparameter grid for the classifier
param_grid = {'C': [0.1, 1.0, 10.0],
              'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
              'gamma': ['scale', 'auto']}

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best hyperparameters and accuracy
print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on K-Nearest Neighbors

In [None]:
classifier = KNeighborsClassifier()

# Define the hyperparameter grid for the classifier
param_grid = {'n_neighbors': [3, 5, 7, 9],
              'weights': ['uniform', 'distance'],
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']}

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best hyperparameters and accuracy
print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on Gaussian Naive Bayes

In [None]:
classifier = GaussianNB()

# There are no hyperparameters to tune for GaussianNB, so no need for a param_grid

# Perform grid search with cross-validation (no hyperparameters to tune)
grid_search = GridSearchCV(classifier, param_grid={}, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best model and accuracy
print("Best Model:", best_classifier)
print("Accuracy on Test Data:", accuracy)

## GridSearch on XGBClassifier

In [None]:
param_grid = {'n_estimators': [50, 100, 200],
              'max_depth': [3, 4, 5],
              'learning_rate': [0.01, 0.1, 0.2],
              'subsample': [0.8, 1.0],
              'colsample_bytree': [0.8, 1.0],
              'gamma': [0, 0.1, 0.2]}

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best hyperparameters and accuracy
print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

## GridSearch on CatBoost Classifier

In [None]:
classifier = CatBoostClassifier()

# Define the hyperparameter grid for the classifier
param_grid = {'iterations': [100, 200, 300],
              'learning_rate': [0.01, 0.1, 0.2],
              'depth': [3, 4, 5],
              'l2_leaf_reg': [1, 3, 5]}

# Perform grid search with cross-validation
grid_search = GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Evaluate the best model on the test data
best_classifier = grid_search.best_estimator_
y_pred = best_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Display the best hyperparameters and accuracy
print("Best Hyperparameters:", grid_search.best_params_)
print("Accuracy on Test Data:", accuracy)

In [None]:
# gridsearch on GRU classifier::


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 1: Prepare your data (sequences and labels)
# X_train, y_train: Training sequences and labels
# X_test, y_test: Testing sequences and labels

# Step 2: Tokenize and pad sequences (if necessary)
max_seq_length = 100  # Adjust this based on your data
X_train = pad_sequences(X_train, maxlen=max_seq_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_seq_length, padding='post', truncating='post')

# Step 3: Build the GRU-based model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_seq_length))
model.add(GRU(units=128, activation='relu', return_sequences=True))
model.add(GRU(units=64, activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Step 6: Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test, y_pred_classes)
print("Accuracy:", accuracy)
