In [8]:
pip install geopandas

Collecting geopandas
  Obtaining dependency information for geopandas from https://files.pythonhosted.org/packages/c4/64/7d344cfcef5efddf9cf32f59af7f855828e9d74b5f862eddf5bfd9f25323/geopandas-1.0.1-py3-none-any.whl.metadata
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Obtaining dependency information for pyogrio>=0.7.2 from https://files.pythonhosted.org/packages/8d/2c/c761e6adeb81bd4029a137b3240e7214a8c9aaf225883356196afd6ef9d8/pyogrio-0.10.0-cp311-cp311-macosx_12_0_arm64.whl.metadata
  Downloading pyogrio-0.10.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (5.5 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Obtaining dependency information for pyproj>=3.3.0 from https://files.pythonhosted.org/packages/2d/4d/610fe2a17de71b4fe210af69ce25f2d65379ba0a48299129894d0d0988ee/pyproj-3.7.0-cp311-cp311-macosx_14_0_arm64.whl.metadata
  Downloading pyproj-3.7.0-cp311-cp311-macosx_14_0_arm64.whl.metadata (31 kB)
Collecting shapely>=2.

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import geopandas as gpd
from sklearn.preprocessing import StandardScaler

# Load geospatial data
def load_data(file_path):
    """
    Load geospatial data using GeoPandas.
    :param file_path: Path to the geospatial dataset file
    :return: GeoDataFrame
    """
    data = gpd.read_file(file_path)
    return data

# Preprocess the data
def preprocess_data(data, target_column, drop_columns=None):
    """
    Preprocess geospatial data for modeling.
    :param data: GeoDataFrame
    :param target_column: Column name for the target variable
    :param drop_columns: List of columns to drop
    :return: Features and target variables
    """
    if drop_columns:
        data = data.drop(columns=drop_columns)
    
    # Handle missing values
    data = data.dropna()

    # Separate features and target
    X = data.drop(columns=[target_column])
    y = data[target_column]
    
    # Convert categorical data to numerical
    X = pd.get_dummies(X, drop_first=True)

    return X, y

# Split the data
def split_data(X, y, test_size=0.2, random_state=42):
    """
    Split data into training and testing sets.
    """
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

# Train a classification model
def train_model(X_train, y_train):
    """
    Train a Random Forest Classifier.
    :param X_train: Training features
    :param y_train: Training target
    :return: Trained model
    """
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    return model

# Evaluate the model
def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model performance on test data.
    :param model: Trained model
    :param X_test: Testing features
    :param y_test: Testing target
    """
    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Main function
if __name__ == "__main__":
    # Path to the geospatial dataset
    file_path = "/Users/dinesh/Documents/College/DATASET/wildfire.shp"  # Replace with your actual file path

    # Load the data
    geospatial_data = load_data(file_path)

    # Specify the target column and columns to drop
    target_column = "wildfire_risk"
    drop_columns = ["geometry"]  # Drop geometry for modeling (optional)

    # Preprocess the data
    X, y = preprocess_data(geospatial_data, target_column, drop_columns)

    # Scale features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split the data
    X_train, X_test, y_train, y_test = split_data(X, y)

    # Train the model
    model = train_model(X_train, y_train)

    # Evaluate the model
    evaluate_model(model, X_test, y_test)

    # Save the model if needed
    # import joblib
    # joblib.dump(model, "wildfire_risk_model.pkl")

DataSourceError: /Users/dinesh/Documents/College/DATASET/wildfire.shp: No such file or directory