## Forest resources data visualization and preparation for Machine Learning development
This project utilizes open forest resource data (FRD) which is available in Finland through https://www.metsaan.fi/yleistietoa-avoimesta-metsatiedosta. Data source is currently available only in finnish langueage.

FRD data loaded in this notebook is for some parts preprocessed in another notebook.

This purpose of this notebook is to visualize the FRD data on the map and preprocess it furher for the use in Machine Learning development.

In [1]:
%matplotlib inline
import geopandas #needed to open gpkg source date file 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import fiona
import seaborn as sns
import folium
from shapely import wkt
pd.set_option('display.max_rows', 10)

In [2]:
#Koealatiedot sisältävät koealojen puuston määrätiedot perustuen maastomittauksiin
#Load the actual Sample (Koeala) data as measured in the forest as on site measurement for each sample
#It contains the true "volume" column which is possible to utilize as "labelled data" in Machine Learning development
gdf1 = geopandas.read_file('Koeala_Mie.gpkg', layer='sampleplot')
gdf3 = geopandas.read_file('Koeala_Mie.gpkg', layer='summary')

gdf3=gdf3.drop(columns=['geometry']) #Remove dublicate columns as it does not have any information

pd.set_option("display.max_columns", 50) # Print all Pandas columns in source data
print(gdf1)
print(gdf3)

     sampleplotid  clusternumber  sampleplotnumber  sampleplottype  maingroup  \
0          222986             21                 5               1          1   
1          222985             21                 3               1          1   
2          270025           2015                 4               1          1   
3          270026           2015                 5               1          1   
4          270023           2015                 2               1          1   
..            ...            ...               ...             ...        ...   
338        223405            420                 4               1          1   
339        223406            420                 5               1          1   
340        282195            855                 4               1          1   
341        282193            855                 2               1          1   
342        282196            855                 5               1          1   

     subgroup  fertilitycla

In [3]:
#Merge geopandas data frames
gdf_koe = pd.merge(gdf1, gdf3, on='sampleplotid', how='outer') #Merge layers
gdf_koe = geopandas.GeoDataFrame(gdf_koe, geometry='geometry')
print("gdf_koe", "\n", gdf_koe)


gdf_koe 
      sampleplotid  clusternumber  sampleplotnumber  sampleplottype  maingroup  \
0          222986             21                 5               1          1   
1          222985             21                 3               1          1   
2          270025           2015                 4               1          1   
3          270026           2015                 5               1          1   
4          270023           2015                 2               1          1   
..            ...            ...               ...             ...        ...   
338        223405            420                 4               1          1   
339        223406            420                 5               1          1   
340        282195            855                 4               1          1   
341        282193            855                 2               1          1   
342        282196            855                 5               1          1   

     subgroup  fe

In [4]:
#Hilatiedot sisältävät laserkeilauksella eli kaukokartoituksella kerätyt tiedot, ne on talletettu 16mx16m ruuutuihin
#Grid (Hila) data is collected e.g by laser scannig. Data orngnized in 16 m x 16 m grid contains e.g. laserheigt
#laserdnsity information. In Machine Learning it could be used as feature vertor data.

path = "./Hila_mie.csv"  # Define the file path
df_hila = pd.read_csv(path)  # Load the data as a Pandas DataFrame
df_hila['WKT'] = df_hila['WKT'].apply(wkt.loads)
gdf_hila = geopandas.GeoDataFrame(df_hila, geometry='WKT')
gdf_hila.crs = "EPSG:3067"
gdf_hila['index_hila'] = gdf_hila.index
print(gdf_hila)

                                                       WKT  gridcellid  \
0        POLYGON ((538048.000 6738112.000, 538064.000 6...    19569383   
1        POLYGON ((538080.000 6738128.000, 538096.000 6...    19563850   
2        POLYGON ((538096.000 6738128.000, 538112.000 6...    19563854   
3        POLYGON ((538048.000 6738128.000, 538064.000 6...    19569344   
4        POLYGON ((538064.000 6738128.000, 538080.000 6...    19569354   
...                                                    ...         ...   
1612351  POLYGON ((524896.000 6722832.000, 524912.000 6...     9739217   
1612352  POLYGON ((524928.000 6722832.000, 524944.000 6...     9739221   
1612353  POLYGON ((524912.000 6722832.000, 524928.000 6...     9739213   
1612354  POLYGON ((524992.000 6722736.000, 525008.000 6...     9739010   
1612355  POLYGON ((524976.000 6722752.000, 524992.000 6...     9739018   

         gridcellnumber  parcelid  realestateid  maingroup  subgroup  \
0             863230503       NaN      

In [5]:
#Haetaan erikseen tehty taulukko mikä sisältää kutakin koealaa ympäröivän hilan
#Matchin sample and grid data is created in separate notebook

path2 = "./mie_match.csv"
df_match = pd.read_csv(path2, header = None) #load cvs to pandas dataframe

koeala_match=df_match[df_match.index%2==0] #take even index rows
hila_match=df_match[df_match.index%2!=0] #take odd index rows

koeala_match.reset_index(drop=True, inplace = True) #reset pandas indexing and remove unnecesssary rows 
hila_match.reset_index(drop=True, inplace = True) #reset pandas indexing and remove unnecesssary rows 

koeala_match.columns = ['koeala']
hila_match.columns = ['hila']

match = pd.concat([koeala_match,hila_match],axis=1)
match.set_index('koeala', inplace=True)

In [6]:
#Remove duplicate koealas as the process of crating them is not perfect
match_nd = match[~(match.index).duplicated(keep="first")]
#pd.set_option('display.max_rows', 500)
#print(match_nd)
#pd.set_option('display.max_rows', 10)

In [7]:
#Filter hila (grid) dataframe to only inclue hilas which have match with koeala (sample) coordinates
hila_match_list= (match_nd['hila']).values.tolist() #convert pandas dataframe to list
gdf_hila = gdf_hila.iloc[hila_match_list, :] #filter hila, take only koeala matching areas
gdf_hila.reset_index(drop=True, inplace = True) #reset pandas indexing
#print(gdf_hila)

In [8]:
#Filter koeala dataframe, remove koeala (sample) items which did not find matching hila (grid)
koe_match_list = match_nd.index.tolist()
gdf_koe['index_koe'] = gdf_koe.index
gdf_koe=gdf_koe.iloc[koe_match_list, :]
gdf_koe.reset_index(drop=True, inplace = True) #reset pandas indexing
#print(gdf_koe)

In [9]:
#Prepare popup data
popup_koe_fields = list(gdf_koe.columns) #Popup for koe points
popup_koe_fields.remove('geometry') # Remove as this column is used as geometry data in Folium map,otherwise popup cause error
popup_hila_fields = list(df_hila.columns) #Popup for hila polygons
popup_hila_fields.remove('WKT') # Remove as this column is used as geometry data in Folium map,otherwise popup causes error

In [10]:
#Näytetään koealat ja ympäröivät hilat kartalla
#Plot sample point and grids (16 m x 16 m) on map
#Pop-up information is availabe in map for both koeala (sample) and hila (grid)

#Set base map
f = folium.Figure(width=980, height=800)

hauhia_coords = [60.80140, 27.43742]
mapa = folium.Map(location = hauhia_coords, zoom_start = 10).add_to(f)

#print(gdf_koe)

#Show koeala points in Folium map
gdf_koe = geopandas.GeoDataFrame(gdf_koe, geometry='geometry')
gdf_koe.crs = "EPSG:3067" #Set the orginal coordinate reference system

gdf_koe_gjson = gdf_koe.to_crs(epsg='4326').to_json() #Change coordinate system
koeala_points = folium.GeoJson(gdf_koe_gjson, popup=folium.GeoJsonPopup(fields=popup_koe_fields))
mapa.add_child(koeala_points)

#Show hila polygons in Folium map
gdf_hila_gjson = gdf_hila.to_crs(epsg='4326').to_json()
hila_points = folium.GeoJson(gdf_hila_gjson, popup=folium.GeoJsonPopup(fields=popup_hila_fields))
mapa.add_child(hila_points)

#print(gdf_koe.crs)
#print(gdf_hila.crs)

#print("Type: %s" % type(gfd_koe))
#print("Type: %s" % type(gdf_hila))
mapa