In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier

In [2]:
columns_for_normalization = ['Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology', 'Vertical_Distance_To_Hydrology',
'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm', 'Horizontal_Distance_To_Fire_Points']

columns_for_bins = ['Elevation', 'Horizontal_Distance_To_Roadways']

**_Get data_**

In [3]:
dataset = pd.read_csv('train.csv')
dataset[columns_for_normalization] = normalize(dataset[columns_for_normalization])
dataset.drop("Id", axis=1, inplace=True)

**_Get training data_**

In [4]:
X = dataset.drop("Cover_Type", axis=1)
y = dataset["Cover_Type"]

In [5]:
X.columns

Index(['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area1',
       'Wilderness_Area2', 'Wilderness_Area3', 'Wilderness_Area4',
       'Soil_Type1', 'Soil_Type2', 'Soil_Type3', 'Soil_Type4', 'Soil_Type5',
       'Soil_Type6', 'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10',
       'Soil_Type11', 'Soil_Type12', 'Soil_Type13', 'Soil_Type14',
       'Soil_Type15', 'Soil_Type16', 'Soil_Type17', 'Soil_Type18',
       'Soil_Type19', 'Soil_Type20', 'Soil_Type21', 'Soil_Type22',
       'Soil_Type23', 'Soil_Type24', 'Soil_Type25', 'Soil_Type26',
       'Soil_Type27', 'Soil_Type28', 'Soil_Type29', 'Soil_Type30',
       'Soil_Type31', 'Soil_Type32', 'Soil_Type33', 'Soil_Type34',
       'Soil_Type35', 'Soil_Type36', 'Soil_Type37', 'Soil_Type38',
       'Soil_Type39', 'Soil_Type40

**_Get test data_**

In [6]:
test_data = pd.read_csv('test.csv')
test_data[columns_for_normalization] = normalize(test_data[columns_for_normalization])
id_column = test_data["Id"].copy(deep=True)
test_data.drop("Id", axis=1, inplace=True)

**_Train model_**

In [7]:
model = RandomForestClassifier(n_jobs=-1, random_state=42).fit(X, y)
model.fit(X, y)
y_predicted = model.predict(test_data)
df_for_saving = pd.DataFrame(y_predicted, index=id_column, columns=["Cover_Type"])

**_Save dataset_**

In [9]:
df_for_saving.to_csv('cover_type.csv', index_label="Id")

1 - Spruce/Fir

2 - Lodgepole Pine

3 - Ponderosa Pine

4 - Cottonwood/Willow

5 - Aspen

6 - Douglas-fir

7 - Krummholz

In [None]:
plt.rcParams["figure.figsize"] = (16,8)
df2 = data.groupby(['Wilderness_Area1', 'Cover_Type'])['Wilderness_Area1'].count().unstack('Cover_Type')
df2[[1, 2, 3, 4, 5, 6, 7]].plot(kind='bar', stacked=True)

In [None]:
df2 = data.groupby(['Wilderness_Area2', 'Cover_Type'])['Wilderness_Area2'].count().unstack('Cover_Type')
df2[[1, 2, 3, 4, 5, 6, 7]].plot(kind='bar', stacked=True)

In [None]:
df2 = data.groupby(['Wilderness_Area3', 'Cover_Type'])['Wilderness_Area3'].count().unstack('Cover_Type')
df2[[1, 2, 3, 4, 5, 6, 7]].plot(kind='bar', stacked=True)

In [None]:
df2 = data.groupby(['Wilderness_Area4', 'Cover_Type'])['Wilderness_Area4'].count().unstack('Cover_Type')
df2[[1, 2, 3, 4, 5, 6, 7]].plot(kind='bar', stacked=True)