In [3]:
from sklearn.datasets import fetch_covtype
import numpy as np
import pandas as pd

In [4]:
# Dataset covertype
# Define variables
NORMAL_TARGET = 2
TARGET_COLUMN = 'Cover_Type'

# List of target class names
TARGET_DICT = {
    1: "Spruce-Fir",
    2: "LodgepolePine",
    3: "PonderosaPine",
    4: "CottonwoodWillow",
    5: "Aspen",
    6: "DouglasFir",
    7: "Krummholz"
}

INV_TARGET_DICT = {
    "Spruce-Fir": 1,
    "LodgepolePine": 2,
    "PonderosaPine": 3,
    "CottonwoodWillow": 4,
    "Aspen": 5,
    "DouglasFir": 6,
    "Krummholz": 7,
}

# List of numerical columns (these are to be standardized)
NUMERICAL_COLUMNS = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 
                     'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways', 
                     'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 
                     'Horizontal_Distance_To_Fire_Points']
# List of categorical columns (these are to be one hot encoded)
CATEGORICAL_COLUMNS = []
# List of ordinal columns (these are to be label encoded)
ORDINAL_COLUMNS = []

In [6]:
%run common.ipynb

In [7]:
def get_covtype_df():
    data = fetch_covtype(as_frame=True)  # Set as_frame=True to get the data as a DataFrame
    X_data = data['data']
    y_data = data['target']
    
    # Combine features and target into one DataFrame
    all_df = pd.concat([X_data, y_data], axis=1)
    
    # Headers of column
    main_labels = data.feature_names[:]
    main_labels.append(TARGET_COLUMN)
    
    print('Normal class: ', all_df[TARGET_COLUMN].mode())
    print('Feature names: ', data.feature_names)
    return (all_df, main_labels)

In [8]:
all_df, main_labels = get_covtype_df()
all_df.head()

Normal class:  0    2
Name: Cover_Type, dtype: int32
Feature names:  ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways', 'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area_0', 'Wilderness_Area_1', 'Wilderness_Area_2', 'Wilderness_Area_3', 'Soil_Type_0', 'Soil_Type_1', 'Soil_Type_2', 'Soil_Type_3', 'Soil_Type_4', 'Soil_Type_5', 'Soil_Type_6', 'Soil_Type_7', 'Soil_Type_8', 'Soil_Type_9', 'Soil_Type_10', 'Soil_Type_11', 'Soil_Type_12', 'Soil_Type_13', 'Soil_Type_14', 'Soil_Type_15', 'Soil_Type_16', 'Soil_Type_17', 'Soil_Type_18', 'Soil_Type_19', 'Soil_Type_20', 'Soil_Type_21', 'Soil_Type_22', 'Soil_Type_23', 'Soil_Type_24', 'Soil_Type_25', 'Soil_Type_26', 'Soil_Type_27', 'Soil_Type_28', 'Soil_Type_29', 'Soil_Type_30', 'Soil_Type_31', 'Soil_Type_32', 'Soil_Type_33', 'Soil_Type_34', 'Soil_Type_35', 'Soil_Type_36', 'Soil_Type_37', 'Soil_Type_38', 'Soil

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_31,Soil_Type_32,Soil_Type_33,Soil_Type_34,Soil_Type_35,Soil_Type_36,Soil_Type_37,Soil_Type_38,Soil_Type_39,Cover_Type
0,2596.0,51.0,3.0,258.0,0.0,510.0,221.0,232.0,148.0,6279.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,2590.0,56.0,2.0,212.0,-6.0,390.0,220.0,235.0,151.0,6225.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
2,2804.0,139.0,9.0,268.0,65.0,3180.0,234.0,238.0,135.0,6121.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3,2785.0,155.0,18.0,242.0,118.0,3090.0,238.0,238.0,122.0,6211.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,2595.0,45.0,2.0,153.0,-1.0,391.0,220.0,234.0,150.0,6172.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
