Geopandas uses fiona.open() when reading in data

In [None]:
#error with rtree installation when importing geopandas
#https://github.com/geopandas/geopandas/issues/1812
import geopandas as gpd

#check supported format drivers
gpd.io.file.fiona.drvsupport.supported_drivers

#equivalent:
#import fiona
#fiona.supported_drivers

Read / write GeoJSON 

#read file from GeoJSON
fp = 'data/finland_municipalities.gjson'
data = gpd.read_file(fp, driver="GeoJSON")

#Wrote tp GeoJSON (just make a copy)
outfp = "temp/finland_municipalities.gjson"
data.to_file(outfp, driver ="GeoJSON")

Managing Filepaths

In [None]:
import os

#define path to the folder, Relative paths are relative to current working directory. In this case L2 is the current directory
input_folder = r"L2\L2_data\NLS\2018\L4\L41\L4132R_shp"
#Join folder path and file name
fp = os.path.join(input_folder, "m_L4132R_p.shp")

#print out the full file path
print(fp)

Reading a shapefile

In [None]:
#read a file using gpd.read_file()
data = gpd.read_file(fp)

type(data)

In [None]:
#check first 5 rows of geo data frame
data.head()

In [None]:
#check all column names
data.columns.values

In [None]:
#deine new column names in a directory
colnames = {'RYHMA': 'GROUP', 'LUOKKA':'CLASS'}

#rename column names
data.rename(columns=colnames,inplace=True)

#check new columns names
data.head()

identify number of rows, classes, groups

In [None]:
print("Number of rows", len(data['CLASS']))
print("Number of classes", data['CLASS'].nunique())
print("Number of groups", data['GROUP'].nunique())

 Creating a simple map from a GeoDataFrame is really easy: you can use .plot() -function from geopandas that creates a map based on the geometries of the data. 

In [None]:
data.plot()

easy to produce a map out of your Shapefile with geopandas. Geopandas automatically positions your map in a way that it covers the whole extent of your data.

Shapely’s geometric objects. 
Geometries are stored in a column called geometry that is a default column name for storing geometric information in geopandas.

In [None]:
#print first 5 rows of the column geometry, returns 5 shapely Polygon objects
data['geometry'].head()

In [None]:
#access the geometry on the first row of data
data.at[0,"geometry"]

In [None]:
#print information about the area of the geometry in the first row
print("Area:", round(data.at[0,"geometry"].area, 0), "square meters")

In [None]:
#Geopandas is an extension for pandas.

#iterate over rows and print the area of a polygon
for index, row in data[0:5].iterrows():
    
    #Get the area from the shapely-object stored in the geometry column
    poly_area = row['geometry'].area
    
    #print info
    print("Polygon area at index {index} is: {area:.0f} square meters".format(index=index, area=poly_area))

Geodataframes and geoseries have an attribute area which we can use for accessing the area for each feature at once:

In [None]:
#get the area of each feature in all 4311 rows 
data.area

In [None]:
# Create a new column called 'area' in data table
data['area'] = data.area

In [None]:
#check area column in data table
data['area']

In [None]:
#get maximum area
round(data['area'].max(),2)

In [None]:
#get minimum area
round(data['area'].min(),2)

In [None]:
#get average area
round(data['area'].mean(),2)

Export GeoDataFrames into various data formats using the to_file() method. 

In our case, we want to export subsets of the data into Shapefiles (one file for each feature class).

Let’s first select one class (class number 36200, “Lake water”) from the data as a new GeoDataFrame:

In [None]:
#select all rows with CLASS value 36200
selection = data.loc[data["CLASS"] ==36200]

In [None]:
#check the selection
selection.plot()

In [None]:
#write this layer into a new shapefile using gpd.to_file method
output_folder = r"L2\L2_data"
output_fp = os.path.join(output_folder, "Class_36200.shp")
print(output_fp)

In [None]:
#write those rows into a new file, default output file format is shapefile
selection.to_file(output_fp)

Read just created shape file in a new geodataframe and verify if data looks good

In [None]:
temp = gpd.read_file(output_fp)

In [None]:
#check first rows
temp.head()

In [None]:
#plot data for visual inspection
temp.plot()

The CLASS column in the data contains information about different land use types. With .unique() -function we can quickly see all different values in that column:

In [None]:
#print all unique values in the column
data['CLASS'].unique()

In [None]:
grouped = data.groupby('CLASS')

#group by function gives us objected called DataFrameGroupBy (similar to keys-values in a dictionary)
grouped

In [None]:
#check all unique classes from 'Class' column in groupby dataframe
grouped.groups.keys()

In [None]:
#check number rows for each data group

for key, group in grouped:
    #print CLASS ID
    print('Terrain Class: ', key)
    
    print('Number of rows: ', len(group))
    
    #group = values list containing all CLASS Instances for a given key.
    #print('current group: ', group, "\n")

In [None]:
#check last group, using the variables in memory from the last iteration of the for loop
group.head()

In [None]:
#check datatype of group
type(group)

#As we can see, each set of data are now grouped into separate GeoDataFrames, and we can save them into separate files.

Let’s export each class into a separate Shapefile. While doing this, we also want to create unique filenames for each class.

In [None]:
basename = "terrain"
key = 36200

#option 1. concatenating using the '+' operator:
out_fp = basename + "_" + str(key) + ".shp"

#option 2. positional formatting using '%' operator
out_fp2 = "%s_%s.shp" %(basename,key)

#option 3. positional formatting using '.format()'
out_fp3 = "{}_{}.shp".format(basename,key)

In [None]:
#determine output directory
output_folder = r"L2/L2_data"

#create a new folder called Results
result_folder = os.path.join(output_folder, 'Results')

#check if the folder exists already
if not os.path.exists(result_folder):
    
    print("Creating a folder for the results...")
    
    #If it does not exist, create one
    os.mkdir(result_folder)

else:
    print("Results folder already exists.")

In [None]:
#Save those individual classes into separate Shapefiles and named the file according to the class name

#iterate over the groups of CLASS ID
for key, group in grouped:
    
    #format the filename
    output_name = "terrain_{}.shp".format(key)
    
    #print info about the process
    print("Saving file", os.path.basename(output_name))
    
    #create an output path
    outpath = os.path.join(result_folder,output_name)
    
    #export the group into a shape file
    group.to_file(outpath)

In [None]:
#summarize total area of each group
area_info = grouped.area.sum().round()

area_info

In [None]:
#export area to csv file using pandas to_csv method
area_info.to_csv(os.path.join(result_folder, "terrain_class_areas.csv"),header=True)