# Forest Cover Type Prediction

This notebook demonstrates how to predict forest cover types using machine learning techniques.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Import our custom predictor class
from forest_cover_predictor import ForestCoverPredictor

%matplotlib inline

## 1. Load and Explore Data

In [None]:
# Initialize predictor
predictor = ForestCoverPredictor()

# Load data (creates sample data for demonstration)
data = predictor.load_data('forest_cover_data.csv')

# Explore data
data = predictor.explore_data(data)

## 2. Data Preprocessing

In [None]:
# Preprocess data
X, y = predictor.preprocess_data(data)
print(f"Features prepared with shape: {X.shape}")

## 3. Model Training

In [None]:
# Train model
X_train, X_test, y_train, y_test, y_pred = predictor.train_model(X, y)

## 4. Results Visualization

In [None]:
# Visualize results
predictor.visualize_results(y_test, y_pred)

## 5. Sample Predictions

In [None]:
# Example prediction on a few samples
sample = X_test.iloc[:5]  # Take first 5 test samples
predictions = predictor.predict_cover_type(sample)

cover_type_names = {
    1: 'Spruce/Fir',
    2: 'Lodgepole Pine', 
    3: 'Ponderosa Pine',
    4: 'Cottonwood/Willow',
    5: 'Aspen',
    6: 'Douglas-fir',
    7: 'Krummholz'
}

print("\nSample Predictions:")
for i, pred in enumerate(predictions):
    print(f"\nSample {i+1}:")
    print(f"  Predicted Cover Type: {pred['cover_type']} (Class {pred['predicted_class']})")
    print(f"  Confidence: {max(pred['probabilities'].values()):.4f}")

## 6. Feature Importance Analysis

In [None]:
# Display top important features
if predictor.model and predictor.feature_names:
    feature_importance = pd.DataFrame({
        'feature': predictor.feature_names,
        'importance': predictor.model.feature_importances_
    }).sort_values('importance', ascending=False).head(15)
    
    print("Top 15 Most Important Features:")
    print(feature_importance)
    
    # Plot feature importance
    plt.figure(figsize=(10, 8))
    sns.barplot(data=feature_importance, x='importance', y='feature')
    plt.title('Top 15 Feature Importances')
    plt.xlabel('Importance')
    plt.tight_layout()
    plt.show()