   # Forest Cover Type Analysis and Prediction

# Import Libraries

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

#Some Styling
import plotly.io as pio
pio.templates.default = "plotly_dark"
sns.set_style("darkgrid")


#displaying markdown
from IPython.display import Markdown
def bold(string):
    display(Markdown(string))
    

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
# Disable warnings 
import warnings
warnings.filterwarnings('ignore')

# Load Dataset

In [None]:
#We'll be using the training dataset.
forest = pd.read_csv("/kaggle/input/forest-cover-type-prediction/train.csv")

In [None]:
forest.head()

# Dataset Info

The dataset includes four wilderness areas located in the Roosevelt National Forest of northern Colorado.

The Roosevelt National Forest is a National Forest located in north central Colorado.

Each observation is a 30m x 30m patch.

* Elevation - Elevation in meters
* Aspect - Aspect in degrees azimuth

To study how aspect works , please refer the following website 
https://pro.arcgis.com/en/pro-app/tool-reference/3d-analyst/how-aspect-works.htm 
that explains how it works.

To study how aspect works , please refer the following website
https://pro.arcgis.com/en/pro-app/tool-reference/3d-analyst/slope.htm

* Slope - Slope in degrees
* Horizontal_Distance_To_Hydrology - Horz Dist to nearest surface water features
* Vertical_Distance_To_Hydrology - Vert Dist to nearest surface water features
* Horizontal_Distance_To_Roadways - Horz Dist to nearest roadway
* Hillshade_9am (0 to 255 index) - Hillshade index at 9am, summer solstice

To study how aspect works , please refer the following website
https://pro.arcgis.com/en/pro-app/tool-reference/3d-analyst/hillshade.htm

* Hillshade_Noon (0 to 255 index) - Hillshade index at noon, summer solstice
* Hillshade_3pm (0 to 255 index) - Hillshade index at 3pm, summer solstice
* Horizontal_Distance_To_Fire_Points - Horz Dist to nearest wildfire ignition points
* Wilderness_Area (4 binary columns, 0 = absence or 1 = presence) - Wilderness area designation
* Soil_Type (40 binary columns, 0 = absence or 1 = presence) - Soil Type designation
* Cover_Type (7 types, integers 1 to 7) - Forest Cover Type designation

#### Seven Types of Forest Cover
1 - Spruce/Fir

2 - Lodgepole Pine

3 - Ponderosa Pine

4 - Cottonwood/Willow

5 - Aspen

6 - Douglas-fir

7 - Krummholz

#### Wilderness Areas

1 - Rawah Wilderness Area

2 - Neota Wilderness Area

3 - Comanche Peak Wilderness Area

4 - Cache la Poudre Wilderness Area

# Descriptive Statistics

In [None]:
forest.shape

Our Dataset consists of 15120 rows and 56 columns.

In [None]:
forest.info()

## Description

#### The following tables give us detailed statistical description of each feature.

In [None]:
forest.iloc[:,1:17].describe()

In [None]:
forest.iloc[:,17:37].describe()

In [None]:
forest.iloc[:,37:].describe()

# Skewness

#### Skewness is a measure of symmetry in a distribution.
* If the skewness is equal to zero,data is perfectly symmetrical.
* if skewness is positive, the data are positively skewed or skewed right.
* if skewness is negative, the data are negatively skewed or skewed left.

In [None]:
forest.skew()

We can see that many are positively skewed and Soil_Type8 and Soil_Type25 are highly positively skewed.

Hillshade data are negatively skewed.

* And we'll have a better look at the skewness with the help of boxplots.

# Missing Values

In [None]:
forest.isna().sum()

### As we can see , there are no missing values in this dataset.

### We are good to go.

The target variable of our data i.e Cover_Type is one hot encoded.

We replace the foresteric values back to their original values for better analysis.

In [None]:
forest['Cover_Type'].replace({1:'Spruce/Fir', 2:'Lodgepole Pine', 3:'Ponderosa Pine', 4:'Cottonwood/Willow', 5:'Aspen', 6:'Douglas-fir', 7:'Krummholz'}, inplace=True)

forest = forest.rename(columns={"Wilderness_Area1":"Rawah_WA","Wilderness_Area2":"Neota_WA",
"Wilderness_Area3":"Comanche_Peak_WA","Wilderness_Area4":"Cache_la_Poudre_WA","Horizontal_Distance_To_Hydrology":"HD_Hydrology",
"Vertical_Distance_To_Hydrology":"VD_Hydrology","Horizontal_Distance_To_Roadways":"HD_Roadways",
                               "Horizontal_Distance_To_Fire_Points":"HD_Fire_Points"})

In [None]:
#We can see the new column names......

forest.columns

In [None]:
# Here I have converted the encoded values for columns Wilderness_Areas 
#and Soil_types back to a single column for better analysis.

forest['Wild Areas'] = (forest.iloc[:,11:15] == 1).idxmax(1)
forest['Soil types'] = (forest.iloc[:,15:55] == 1).idxmax(1)

#Drop the columns which are not required now
forest = forest.drop(columns=["Id",'Rawah_WA', 'Neota_WA', 'Comanche_Peak_WA',
       'Cache_la_Poudre_WA', 'Soil_Type1', 'Soil_Type2', 'Soil_Type3',
       'Soil_Type4', 'Soil_Type5', 'Soil_Type6', 'Soil_Type7', 'Soil_Type8',
       'Soil_Type9', 'Soil_Type10', 'Soil_Type11', 'Soil_Type12',
       'Soil_Type13', 'Soil_Type14', 'Soil_Type15', 'Soil_Type16',
       'Soil_Type17', 'Soil_Type18', 'Soil_Type19', 'Soil_Type20',
       'Soil_Type21', 'Soil_Type22', 'Soil_Type23', 'Soil_Type24',
       'Soil_Type25', 'Soil_Type26', 'Soil_Type27', 'Soil_Type28',
       'Soil_Type29', 'Soil_Type30', 'Soil_Type31', 'Soil_Type32',
       'Soil_Type33', 'Soil_Type34', 'Soil_Type35', 'Soil_Type36',
       'Soil_Type37', 'Soil_Type38', 'Soil_Type39', 'Soil_Type40'])
#I don't like this big Soil_Type name
#Let's replace it with some short name
#Which will help us in visualizations
lst = []
for value in forest['Soil types']:
    value = value.replace('Soil_Type',"ST")
    lst.append(value)
    
forest['Soil types'] = lst

# Feature Analysis..

# Categorical Features

##### Let's Look at the categorical features first and then we can compare all the numerical features with them.

## Cover Type

#### Cover Type feature consists of seven forest cover types as seen in the description.

In [None]:
fig = px.histogram(forest,x="Cover_Type",color="Cover_Type",height=400,width=800)
fig.show()

* We can see we have same amount of data for all the forest covers.
* Does that mean same amount of data is taken from the four Wild Areas.
* Let's have a look.

### Wilderness Areas

#### The dataset consists of four wilderness areas located in the Roosevelt National Forest of northern Colorado.

In [None]:
fig = px.pie(forest,names="Wild Areas",height=300,width=800)
fig.show()

fig = px.histogram(forest,x="Wild Areas",color="Cover_Type",barmode="group",
                   height=400,width=800)
fig.show()

* Now we can clearly see the difference in the above plots.


##### Rawah_WA
* Krummholz
* Spruce/Fir
* Lodgepole Pine
* Aspen

##### Comanche_Peak_WA
* Krummholz
* Spruce/Fir
* Lodgepole Pine
* Aspen
* Douglas-fir
* Ponderosa Pine

##### Cache_La_Poudre
* Cottonwood/Willow
* Douglas-fir
* Ponderosa Pine

##### Neota_WA
* Krummholz
* Spruce/Fir
* Lodgepole Pine


Cache_La_Poudre and Neota Wilderness Areas have same number of forest covers.

But Neota Wild Area has least number of forests compared to other wild areas.

Comanche_Peak accounts for 42% of the total Wild Areas data.

The least data is for Neota Wild Area i.e 3.3% in our data.

We can also see that none of the wild areas have all the types of forest covers.

### Soil Types

### There are a total of 40 Soil Types in our data.

In [None]:
fig = px.histogram(forest,x="Soil types",color="Cover_Type",height=400,width=850)
fig.show()

fig = px.pie(forest,names="Soil types",height=400,width=850)
fig.update_traces(textposition='inside')
fig.show()

##### Pie chart gives us a proper view 

* We have most of the data for soil type 10 and 29.
* Many soil types are rare in our data

You can just move the cursor over the pie chart for the soil types with low percentage.

## Soil Types in each Wild Area.

#### The below histogram plot tells us types of soil in each wild area

In [None]:
temp =forest[forest['Wild Areas']=="Rawah_WA"][['Wild Areas','Soil types',"Cover_Type"]]
fig = px.histogram(temp,x="Soil types",color="Cover_Type",height=500,width=1000,
                  title="Rawah Wild Area",barmode="group")
fig.show()

temp =forest[forest['Wild Areas']=="Comanche_Peak_WA"][['Wild Areas','Soil types',"Cover_Type"]]
fig = px.histogram(temp,x="Soil types",color="Cover_Type",height=500,width=1000,
                  title="Comanche Peak Area",barmode="group")
fig.show()

temp =forest[forest['Wild Areas']=="Cache_la_Poudre_WA"][['Wild Areas','Soil types',"Cover_Type"]]
fig = px.histogram(temp,x="Soil types",color="Cover_Type",height=500,width=1000,
                  title="Cache la Poudre Wild Area",barmode="group")
fig.show()

temp =forest[forest['Wild Areas']=="Neota_WA"][['Wild Areas','Soil types',"Cover_Type"]]
fig = px.histogram(temp,x="Soil types",color="Cover_Type",height=500,width=1000,
                  title="Neota Wild Area",barmode="group")
fig.show()

# Numerical Features

#### Let's start with the numerical features now

## Elevation

In [None]:
fig = px.histogram(forest,x="Elevation",color="Cover_Type",marginal='rug',title="Elevation Histogram",
                  height=500,width=800)
fig.show()

* Elevation Histogram :- 

    All the Forest cover types are highly distributed.
    The above plot shows us that Cottonwood tress grow at a lower elevated regions compared to Krummholz,
    Spruce and Lodgepole tress grow at higher elevation.
    
* And by looking at the rug plot above the histogram , we can see similarities in Cottonwood,Douglas fir and Ponderosa pine trees.
* Same with Aspen , Lodgepole Pine and Spruce forest covers.

In [None]:
fig = px.box(forest,x="Cover_Type",y="Elevation",color="Cover_Type",height=400,width=900)
fig.update_layout(title={'text':"Elevation Box Plot"})
fig.show()

In [None]:
temp = forest.groupby(['Cover_Type'],as_index=False)[["Elevation"]].median()
temp.sort_values(by="Elevation",ascending=False).style.background_gradient(cmap="Reds")

The above table is calculated by taking the median value for each forest cover type.
And we see that on avg , krummholz,Spruce and Lodgepole trees grow on higher elevation and douglas,Ponderosa and Cottonwood at lower elevation.

* Now let's look at the elevation for each forest cover in each wild area.

In [None]:
#Let's look have a look at the wild areas 
#and how are the forest covers distributed in these areas along with these features.

temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Elevation']].median()

#Both barplot and treemap help in better understanding the features.
fig = px.bar(temp, x="Wild Areas", y="Elevation", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

fig = px.treemap(temp, path=['Wild Areas','Cover_Type'],values='Elevation',height=400,width=900)
fig.show()

temp.style.background_gradient(cmap='plasma')

* We can see that Krummholz and Spruce,Fir trees are located at higher elevation in all the four wild areas
compared to other trees.

## Aspect

According to the information found on the website, aspect identifies the direction of the downhill slope faces. 

It is measured clockwise in degrees from 0 (due north) to 360 (again due north), coming full circle. 

Flat areas having no downslope direction are given a value of -1.

In [None]:
fig = px.histogram(forest,x="Aspect",color="Cover_Type",marginal='rug',title="Aspect Histogram",
                  height=500,width=900)
fig.show()

The above histogram plot doesn't help us much .
* Aspect :- I don't see any difference in cover types with aspect values . As I don't have any much knowledge on how aspect works , Let's just look at how it relates with wild areas.


We can use the below table for better understanding.

In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Aspect']].median()

fig = px.bar(temp, x="Wild Areas", y="Aspect", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

fig = px.treemap(temp, path=['Wild Areas','Cover_Type'],values='Aspect',height=400,width=900)
fig.show()

temp.style.background_gradient(cmap='YlGnBu')

* From what we know, we can say that trees in Rawah Wild Areas have aspect degree in range 80-105.
* So these trees are likely to be found on sites that have a east facing downhill direction.
* And Douglas fir and Lodgepole trees in Cache la Poudre wild area are to be found on areas that have 
* west facing downhill direction.

## Slope

In [None]:
fig = px.histogram(forest,x="Slope",color="Cover_Type",marginal='box',title="Slope Histogram",
                  height=500,width=800)
fig.show()

This plot also doesn't help us much

* Slope :- Above histogram follows a bit uniform distribution with some values right skewed.

* Some trees having a higher slope like Ponderosa Pine and Douglas Fir in Cache la Poudre wild area and 
* Aspen tree in Rawah wild area.

* All the trees have thier  average slope in range 10 - 20 as seen in below bar plot.

In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Slope']].median()

fig = px.bar(temp, x="Wild Areas", y="Slope", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

## Distances to nearest surface water

## Horizontal Distance to Hydrology

In [None]:
fig = px.histogram(forest,x="HD_Hydrology",color="Cover_Type",marginal='rug',title="HD_Hydrology Histogram",
                  height=500,width=800)
fig.show()

* Horizontal Distance to Hydrology :- We can see a right skewed histogram that tells us that some of the forest covers are located at higher distance from any surface water.

* The best way to check outliers and compare the medians is a box pot.

In [None]:
fig = px.box(forest,x="Cover_Type",y="HD_Hydrology",color="Cover_Type",height=500,width=800)
fig.update_layout(title={'text':"Horizontal Dis to Hydrology Box Plot"})
fig.show()

* With this boxplot , we can see a lot of outliers in Aspen,Lodgepole Pine , Spruce and other trees.
* Many of the Logdepole,Krummholz are located closer to surface waters, some are located at a distance more than 1500.
* But we are not able to compare the medians.
* We'll look at the bar plot that uses median as estimator.

In [None]:
#Let's look at their relation with wild areas.....
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['HD_Hydrology']].median()

fig = px.bar(temp, x="Wild Areas", y="HD_Hydrology", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

temp.style.background_gradient(cmap="Blues")


* Cottonwood trees are fast-growing trees found growing along streams, rivers, and lowland areas and hence is the lowest in the barplot and table.
* Krummholz trees grow in rugged environments: cliffs, mountaintops, canyon walls and are hence more distance to any surface water is possible as shown above.
* Similarly , Lodgepole trees are commonly located near the ocean shore and in dry mountain forests.We can see many trees near surface water , some far away.
* Aspen trees generally grow in high-altitude areas above 5,000 feet but also exists at sea level where climate conditions are ideal.Some of the aspen trees as seen as outliers in box plot grow in high altitude most probably far away from water surface , most of them are located near water surface.

## Vertical Distance to Hydrology

In [None]:
fig = px.histogram(forest,x="VD_Hydrology",color="Cover_Type",marginal='rug',title="VD_Hydrology Histogram",
                  height=500,width=800)
fig.show()

* Vertical Distance to Hydrology:- Here also we have a right skewed histogram that tells us that vertical distance to any nearby surface water is less.

In [None]:
#Let's look at their relation with wild areas.....
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['VD_Hydrology']].median()

fig = px.bar(temp, x="Wild Areas", y="VD_Hydrology", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()
#We can also use treeplot for better visualization
fig = px.treemap(temp, path=['Wild Areas','Cover_Type'],values='VD_Hydrology',height=400,width=800)
fig.show()

temp.style.background_gradient(cmap="BuPu")



* Here also , if we calculate average distance for cottonwood tree to nearest surface water is the less compared to other trees.
* And Ponderosa and krummholz vertical distance is more than the other trees.

## Horizontal Distance to Roadways

In [None]:
fig = px.histogram(forest,x="HD_Roadways",color="Cover_Type",marginal='rug',title="HD_Roadways Histogram",
                  height=500,width=800)
fig.show()

* HD Roadways:- Here also we have a right skewed . 
    By looking at the histogram of Cottonwood ,Douglas fir and Ponderosa Pine forest cover, they are the closet to roadways.
    Rest we can see , most of the trees are near to roadways and some are far from roadways.

* And by looking at the below barplot ,we justify the above statement.

In [None]:
#This plot shows us on average distance to roadways for each forest covers.
temp = forest.groupby(['Cover_Type'],as_index=False)[['HD_Roadways']].median()

fig = px.bar(temp.sort_values(by="HD_Roadways",ascending=False), x="HD_Roadways", y="Cover_Type", color='Cover_Type',orientation='h',
             height=300,width=900)
fig.show()

In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['HD_Roadways']].median()

fig = px.bar(temp, x="Wild Areas", y="HD_Roadways", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

fig = px.treemap(temp, path=['Wild Areas','Cover_Type'],values='HD_Roadways',height=400,width=800)
fig.show()

temp.style.background_gradient(cmap="Greys")

* As we saw with distance to hydrology, here also Krummholz is located far away from roadways.
* Similary, Spruce and lodgepole trees are also located far away from roadways and surface waters.

* The reason for cottonwood,douglas,ponderosa being close to roadways can be thier uses.
* Cottonwood trees are widely grown for timber production along wet river banks, where their exceptional growth rate provides a large crop of wood within just 10–30 years.
* Douglas trees are one of the best timber trees and popular as christmas trees.
* Ponderosa trees are also important for timber production , in production of ponderosa pine lumber.
* All the three trees are used for timber production and closest to river banks and roadways for production.

## Horizontal distance to fire points..

In [None]:
fig = px.histogram(forest,x="HD_Fire_Points",color="Cover_Type",marginal='rug',title="HD Fire Points Histogram",
                  height=500,width=800)
fig.show()

In [None]:
fig = px.box(forest,x="Cover_Type",y="HD_Fire_Points",color="Cover_Type",height=500,width=800)
fig.update_layout(title={'text':"Horizontal Dis to Fire points Box Plot"})
fig.show()

* Some Aspen trees are way far than the other aspen trees as seen in above box plot and histogram and most are in range 
850 to 1934.Fire is a natural feature in much of the aspen ecosystem and is responsible for the abundance of aspen in the West and for the even-aged structure of most stands.
Aspen forest does not readily burn and fire reduces the overstory, stimulates shoots to sprout, and kills invading conifers growing in the aspen clone.
This may be the reason for it to be closer to fire points.
 


In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['HD_Fire_Points']].median()

fig = px.bar(temp, x="Wild Areas", y="HD_Fire_Points", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

fig = px.treemap(temp, path=['Wild Areas','Cover_Type'],values='HD_Fire_Points',height=400,width=800)
fig.show()

temp.style.background_gradient(cmap='YlOrRd')


* Cottonwood tree seeds are highly flammable 
and particularly when they accumulate in piles or drifts, and can carry fire through yard and the best way to control
cottonwood trees is by preventing the cottonwood blossoms from forming seeds.
* Douglas fir and Ponderosa trees are both resistant to fire.
Douglas-fir and ponderosa pine are known for their fire tolerance in large part due to thick insulating bark that develops 
with age and protects the inner cambium from heat injury. 
* But in the case of Lodgepole pine, lodgepole pine trees also require the heat of fire to release their seeds. 
Fire also produces favourable conditions for the seeds of these pines to germinate. 
Lodgepole pine depend on fire to regenerate.
But in our data , as seen above in histogram and box plot , most of the trees are near to fire points, the
trees that are as outliers are far away from these points and these are many in forestber.
* Similar is the case with Krummholz trees.

* All the seven trees are closer to fire points , but due to many outliers in lodgepole,spruce ad krummholz trees,
we see them on top of the table and higher in bar plot.

* By looking at the plots, In Cache_la_Poudre wild area all trees are closer to fire points.
* And Rawah , almost including Aspen all trees are far away from fire points.
* Same thing to notice here,Lodgepole pine is present in all the wild areas,but closest in Cache_la_Poudre,
then Comanche , then Neota and farthest in Rawah Wild Area.

## Hillshade at 9am..

In [None]:
fig = px.histogram(forest,x="Hillshade_9am",color="Cover_Type",marginal='box',title="Hillshade at 9am Histogram",
                  height=500,width=800)
fig.show()

* Here we see a left skewed histogram with all the trees having almost highest shade index at 9am.


In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Hillshade_9am']].median()

fig = px.bar(temp, x="Wild Areas", y="Hillshade_9am", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()
#No use of treemap as we don't see any difference in bar plots.

* I don't see any difference as such in the trees with hillshade at 9am by looking at the histogram.

* But the bar plot tells us that , all trees grow at places where hillshade index at 9am is more.

## Hillshade at Noon

In [None]:
fig = px.histogram(forest,x="Hillshade_Noon",color="Cover_Type",marginal='box',title="Hillshade at Noon Histogram",
                  height=500,width=800)
fig.show()

In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Hillshade_Noon']].median()

fig = px.bar(temp, x="Wild Areas", y="Hillshade_Noon", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()


* If we see the median value ,every forest cover in all the wild areas have same shade index at Noon.

## Hillshade at 3pm

In [None]:
fig = px.histogram(forest,x="Hillshade_3pm",color="Cover_Type",marginal='box',title="Hillshade at 3pm Histogram",
                  height=500,width=800)
fig.show()

In [None]:
temp = forest.groupby(['Wild Areas','Cover_Type'],as_index=False)[['Hillshade_3pm']].median()

fig = px.bar(temp, x="Wild Areas", y="Hillshade_3pm", color='Cover_Type', barmode='group',
             height=400,width=900)
fig.show()

temp.style.background_gradient(cmap="cividis")

* At 3pm, all the trees in each wild area have higher hillshade index except Aspen in Rawah Wild area.

* The highest shade at 3pm can be seen for Douglas fir and Lodgepole forest covers located in Cache la wild area
* And Aspen in Rawah wild area.

#### So , we are done with both numerical and categorical features of our data.

#### Now , let's take a look at the correlation of our features among each other..

# Correlation

In [None]:
forest_corr = forest.corr()
forest_corr.style.background_gradient(cmap="cool")

We used styling here for better view on the correlation of the variables.

But a heatmap will give us more information on the correlation.

# Heatmap --> forest features

In [None]:
fig=plt.figure(figsize=(12,10))
sns.heatmap(forest_corr,annot=True,linewidths=.3,cmap='YlOrBr')

### Correlated features

* Elevation and Horizontal Distance to Roadways.
* Aspect and Hillshade at 9am
* Aspect and Hillshade at 3pm
* Slope and Hillshade at Noon
* Horizontal Distance to hydrology and Vertical Distance to Hydrology
* Hillshade at 9am and Hillshade at 3pm
* Hillshade at 3pm and Hillshade at Noon


# Scatterplots

### Scatterplots are important in statistics as they show the extent of correlation.

### We'll use scatterplots here for the above correlated features.

## Elevation --- HD Roadways

In [None]:
fig = px.scatter(forest,x='Elevation',y= 'HD_Roadways',color='Cover_Type',width=800,height=400)
fig.show()

#### We can see a positive correlation between Elevation and Distance to Roadways.

* Cottonwood trees have the lowest Elevation and distance to roadways compared to other forest covers.
* Douglas trees grow on lower elevated areas but higher than compared to cottonwood trees.
* All Krummholz trees grow on higher elevated areas but we can see a variation in their distance to roadways value.


## Aspect --- Hillshade 3pm

In [None]:
fig = px.scatter(forest,x='Aspect',y= 'Hillshade_3pm',color='Cover_Type',width=800,height=400)
fig.show()

## Horizontal distance --- Vertical distance to Hydrology.

In [None]:
fig = px.scatter(forest,x='HD_Hydrology',y= 'VD_Hydrology',color='Cover_Type',width=800,height=400)
fig.show()

#### We can see  positive correlation between both the distances to hydrology

## Hillshade at Noon --- 3pm

In [None]:
fig = px.scatter(forest,x='Hillshade_Noon',y= 'Hillshade_3pm',color='Cover_Type',width=800,height=400)
fig.show()

## Aspect --- Hillshade at 9am

In [None]:
fig = px.scatter(forest,x='Aspect',y= 'Hillshade_9am',color='Cover_Type',width=800,height=400)
fig.show()

## Hillshade at 9am --- 3pm

In [None]:
fig = px.scatter(forest,x='Hillshade_9am',y= 'Hillshade_3pm',color='Cover_Type',width=800,height=400)
fig.show()

#### This graph is obvious as the shade at 9am will be opposite to the shade at 3pm on the area.

#### We can see a negative correlation between hillshade index at 9am and 3pm.

## Slope --- Hillshade at Noon

In [None]:
fig = px.scatter(forest,x='Slope',y= 'Hillshade_Noon',color='Cover_Type',width=800,height=400)
fig.show()

## Ok , We are done here.

## After a long Analysis.

## Hope u love my notebook

## And please upvote. :):)