In [1]:
#Code developed by Cassady Harraden, Mineral Deposit Research Unit, University of British Columbia, June 2024
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

#Load the CSV data
data = pd.read_csv('19FB-131_Counts_MinLabel.csv')

#Identify feature columns (excluding 'MinLabel', 'x', 'y', 'Video')
feature_columns = [col for col in data.columns if col not in ['MinLabel', 'x', 'y', 'Video']]

# Prepare features and target
features = data[feature_columns]
target = data['MinLabel']

# Handle missing values in features
features = features.fillna(features.mean())

#Separate rows with no MinLabel
missing_minlabel_indices = data[data['MinLabel'].isna()].index
non_missing_minlabel_indices = data.dropna(subset=['MinLabel']).index

# Create separate DataFrames
features_with_minlabel = features.loc[non_missing_minlabel_indices]
target_with_minlabel = target.loc[non_missing_minlabel_indices]

#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_with_minlabel, target_with_minlabel, test_size=0.2, random_state=42)

#Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

#Predict for all rows
data['PredMin'] = rf_model.predict(features)

#Report model accuracy on MinLabel rows
y_pred_test = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test)
print(f'Model Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# OPTIONAL: save the model for future use
joblib.dump(rf_model, '19FB-131_RFModel.pkl')

#Export Dataframe with predictions to a new CSV file
data.to_csv('19FB-131_RFPredMin.csv', index=False)

print("Model training, evaluation, and prediction completed successfully!")


  data = pd.read_csv('19FB-131_Counts_MinLabel.csv')


Model Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

  Andalusite       1.00      1.00      1.00        12
    Corundum       1.00      1.00      1.00        20
   Muscovite       1.00      1.00      1.00        14
      Pyrite       1.00      1.00      1.00        25
      Quartz       1.00      1.00      1.00        14

    accuracy                           1.00        85
   macro avg       1.00      1.00      1.00        85
weighted avg       1.00      1.00      1.00        85

Model training, evaluation, and prediction completed successfully!
