# Chapter 01: Geographic data in Python

## Load libraries

In [None]:
import pandas as pd
import geopandas as gpd
import shapely

pd.set_option('display.max_rows', 6)

## 1.2.1 Vector data classes

The main classes for working with geographic vector data in Python are:
- `GeoDataFrame`: a pandas DataFrame with a geometry column
- `GeoSeries`: a pandas Series with a geometry column
- `shapely`: a library for creating and manipulating geometric objects

The main classes in the `shapely` library are:
- `Geometry`: a base class for all geometric objects
- `Point`: a class for representing points in space
- `LineString`: a class for representing lines in space
- `Polygon`: a class for representing polygons in space
- `MultiPoint`: a class for representing multiple points in space
- `MultiLineString`: a class for representing multiple lines in space
- `MultiPolygon`: a class for representing multiple polygons in space
- `GeometryCollection`: a class for representing a collection of geometries


## Import data

In [None]:
gdf = gpd.read_file(r"F:\books\geocompy-main\data\world.gpkg")

## 1.2.2 Vector layer

In [None]:
type(gdf)

In [None]:
gdf.shape

In [None]:
gdf = gdf[['name_long', 'geometry']]
gdf

In [None]:
gdf[gdf.name_long == 'Egypt']

In [None]:
gdf.plot()

In [None]:
gdf.explore()

In [None]:
gdf[gdf.name_long == 'Egypt'].explore()


## 1.2.3 Geometry columns

In [None]:
gdf.geometry

In [None]:
gdf.geometry.crs

In [None]:
gdf.envelope

In [None]:
gdf.geometry.envelope

In [None]:
gdf2 = gdf.copy()
gdf2.geometry = gdf.envelope
gdf2

In [None]:
gdf.geometry.type

In [None]:
gdf.geometry.type.value_counts()

In [None]:
gdf['bbox'] = gdf.envelope
gdf['polgon'] = gdf.geometry
gdf

In [None]:
gdf = gdf.set_geometry('bbox')
gdf.explore()

## 1.2.5 Geometries

In [None]:
gdf = gdf.set_geometry("geometry")
gdf.geometry.iloc[3]

In [None]:
gdf[gdf.name_long == 'Egypt'].geometry.iloc[0]

### Point

In [None]:
point = shapely.Point(5, 2)
point

### LineString

In [None]:
linestring = shapely.LineString([(1,5), (4,4), (4,1), (2,2), (3,2)])
linestring

### Polygon

In [None]:
polygon = shapely.Polygon(
    [(1,5), (2,2), (4,1), (4,4), (1,5)],  ## Exterior
    [[(2,4), (3,4), (3,3), (2,3), (2,4)]]  ## Hole(s)
)
polygon

### MultiPoint

In [None]:
multipoint = shapely.MultiPoint([(5,2), (1,3), (3,4), (3,2)])
multipoint

### MultiLineString

In [None]:
multilinestring = shapely.MultiLineString([
    [(1,5), (4,4), (4,1), (2,2), (3,2)],  ## 1st sequence
    [(1,2), (2,4)]  ## 2nd sequence, etc.
])
multilinestring

### MultiPolygon

In [None]:
multipolygon = shapely.MultiPolygon([
    [[(1,5), (2,2), (4,1), (4,4), (1,5)], []],  ## Polygon 1 
    [[(0,2), (1,2), (1,3), (0,3), (0,2)], []]   ## Polygon 2, etc.
])
multipolygon

### GeometryCollection

In [None]:
geometrycollection = shapely.GeometryCollection([multipoint, multilinestring])
geometrycollection

In [None]:
multipolygon.buffer(0.2).difference(multipolygon)

## 1.2.6 Vector layer from scratch

First we construct a point geometry using the `Point` class from the `shapely` library. Then we create a `GeoDataFrame` with a single row and a single column containing the point geometry. Finally, we set the coordinate reference system (CRS) to WGS 84 (EPSG:4326) using the `set_crs` method.


In [None]:
lnd_point = shapely.Point(0.1, 51.5)
lnd_point

In [None]:
lnd_geo = gpd.GeoSeries(lnd_point, crs="EPSG:4326")
lnd_geo

In [None]:
lnd_data = {
    'name': ['London'],
    'temperature': [25],
    'date': ['2023-06-21'],
    'geometry': lnd_geo
}

lnd_data

In [None]:
lnd_layer = gpd.GeoDataFrame(lnd_data)
lnd_layer

In [None]:
lnd_point = shapely.Point(0.1, 51.5)
paris_point = shapely.Point(2.3, 48.9)
towns_geom = gpd.GeoSeries([lnd_point, paris_point], crs=4326)
towns_data = {
  'name': ['London', 'Paris'],
  'temperature': [25, 27],
  'date': ['2013-06-21', '2013-06-21'],
  'geometry': towns_geom
}
towns_layer = gpd.GeoDataFrame(towns_data)
towns_layer

In [None]:
towns_layer.explore()

A `GeoDataFrame` can also be constructed a DataFrame object that contains columns with coordinates.

In [None]:
towns_table = pd.DataFrame(
    {
        'name': ['London', 'Paris'],
        'temperature': [25, 27],
        'date': ['2017-06-21', '2017-06-21'],
        'x': [0.1, 2.3],
        'y': [51.5, 48.9]
    }
)
towns_geom = gpd.points_from_xy(towns_table.x, towns_table.y, crs=4326)
towns_layer = gpd.GeoDataFrame(towns_table, geometry=towns_geom)
towns_layer.explore()

## 1.2.7 Derived numerical properties

In [None]:
linestring

In [None]:
linestring.length

In [None]:
multipolygon.area

In [None]:
gpd.GeoSeries([point, linestring, polygon, multipoint, multilinestring, multipolygon]).area

In [None]:
gdf[gdf.name_long == 'Slovenia'].to_crs(32633).area

## 1.3 Raster data

### 1.3.1 Using rasterio

In [None]:
import numpy as np
import rasterio
import rasterio.plot

In [None]:
src = rasterio.open(r"F:\books\geocompy-main\data\srtm.tif")
rasterio.plot.show(src, cmap='terrain')

In [None]:
src.meta

In [None]:
src.read(1)

In [None]:
elev = np.arange(1, 37, dtype=np.uint8).reshape(6, 6)
elev

In [None]:
v = [
  1, 0, 1, 2, 2, 2, 
  0, 2, 0, 0, 2, 1, 
  0, 2, 2, 0, 0, 2, 
  0, 0, 1, 1, 1, 1, 
  1, 1, 1, 2, 1, 1, 
  2, 1, 2, 2, 0, 2
]
grain = np.array(v, dtype=np.uint8).reshape(6, 6)
grain

In [None]:
new_transform = rasterio.transform.from_origin(
    west=-1.5, 
    north=1.5, 
    xsize=0.5, 
    ysize=0.5
)
new_transform

In [None]:
rasterio.plot.show(elev, transform=new_transform)

In [None]:
rasterio.plot.show(grain, transform=new_transform)

## 1.4 Coordinate Reference Systems

In [None]:
import pyproj
epsg_codes = pyproj.get_codes('EPSG', 'CRS')
epsg_codes[:10]

In [None]:
pyproj.CRS.from_epsg(4326)

In [None]:
zion = gpd.read_file(r"F:\books\geocompy-main\data\zion.gpkg")
zion.crs

In [None]:
zion.explore(
    tiles = 'Esri WorldImagery',
    style_kwds = {'color': 'black', 'weight': 1, 'fillOpacity': 0.5},
    name = 'Zion National Park',
)

In [None]:
zion.to_crs(4326).plot(
    edgecolor='black',
    color='lightgrey'
).grid()

In [None]:
zion.plot(
    edgecolor='black',
    color='lightgrey'
).grid()