# Using pandas and geopandas data

In [11]:
import pandas as pd
import geopandas as gpd
import numpy as np

from earthkit.data import from_object, from_source
from earthkit.data.testing import earthkit_file

## Pandas
Construct a sample pandas objects for demonstration

In [5]:
t2m_series = pd.Series(np.linspace(273.15,293,20), name='t2m')
lat_series = pd.Series(np.arange(50,52,0.1), name='latitude')
lon_series = pd.Series(np.arange(-1,1,0.1), name='longitude')
date_series = pd.Series(pd.date_range('2022-01-01', '2022-01-20'), name='date')

date_series
t2m_df = pd.concat([t2m_series, lat_series, lon_series], axis=1).set_index(date_series)
t2m_df

Unnamed: 0_level_0,t2m,latitude,longitude
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-01,273.15,50.0,-1.0
2022-01-02,274.194737,50.1,-0.9
2022-01-03,275.239474,50.2,-0.8
2022-01-04,276.284211,50.3,-0.7
2022-01-05,277.328947,50.4,-0.6
2022-01-06,278.373684,50.5,-0.5
2022-01-07,279.418421,50.6,-0.4
2022-01-08,280.463158,50.7,-0.3
2022-01-09,281.507895,50.8,-0.2
2022-01-10,282.552632,50.9,-0.1


Create an earthkit object from the pandas object, and use earthkit methods

In [6]:
ek_pandas = from_object(t2m_df)
# iterating behaves as a pandas DF
for thing in ek_pandas[:2]:
    print(thing)

# Describe representation is a pandas dataframe
ek_pandas.describe()


t2m
latitude


Unnamed: 0_level_0,t2m,latitude,longitude
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-01-01,273.15,50.0,-1.0
2022-01-02,274.194737,50.1,-0.9
2022-01-03,275.239474,50.2,-0.8
2022-01-04,276.284211,50.3,-0.7
2022-01-05,277.328947,50.4,-0.6
2022-01-06,278.373684,50.5,-0.5
2022-01-07,279.418421,50.6,-0.4
2022-01-08,280.463158,50.7,-0.3
2022-01-09,281.507895,50.8,-0.2
2022-01-10,282.552632,50.9,-0.1


## Geopandas

In [8]:
# Test the geojson reader
geojson_source = from_source("file", earthkit_file("tests/data/NUTS_RG_20M_2021_3035.geojson"))
geojson_source

Unnamed: 0,id,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
0,FR,FR,0,FR,France,France,0.0,0,0,FR,"MULTIPOLYGON (((9954236.116 -3059379.316, 9961..."
1,HR,HR,0,HR,Hrvatska,Hrvatska,0.0,0,0,HR,"MULTIPOLYGON (((4827385.889 2618351.326, 48483..."
2,HU,HU,0,HU,Magyarország,Magyarország,0.0,0,0,HU,"POLYGON ((5214660.069 2880853.832, 5216710.220..."
3,AL,AL,0,AL,Shqipëria,Shqipëria,0.0,0,0,AL,"POLYGON ((5129579.170 2204098.752, 5148385.473..."
4,AT,AT,0,AT,Österreich,Österreich,0.0,0,0,AT,"POLYGON ((4742889.368 2876362.725, 4783217.798..."
...,...,...,...,...,...,...,...,...,...,...,...
2005,TRC21,TRC21,3,TR,Şanlıurfa,Şanlıurfa,4.0,2,3,TRC21,"POLYGON ((6904684.585 2120354.802, 6938677.828..."
2006,TRC22,TRC22,3,TR,Diyarbakır,Diyarbakır,4.0,2,3,TRC22,"POLYGON ((6989716.599 2273670.524, 6982786.486..."
2007,NO0B2,NO0B2,3,NO,Svalbard,Svalbard,3.0,3,1,NO0B2,"MULTIPOLYGON (((4754167.335 6382461.409, 47465..."
2008,NO0B,NO0B,2,NO,Jan Mayen and Svalbard,Jan Mayen and Svalbard,,0,0,NO0B,"MULTIPOLYGON (((4754167.335 6382461.409, 47465..."


Iterating over geojson/geopandas objects iterates over rows (feature) instead of the pandas default (columns).
This is more useful for geopandas.

In [10]:
for thing in geojson_source[:2]:
    print(thing)

id                                                           FR
NUTS_ID                                                      FR
LEVL_CODE                                                     0
CNTR_CODE                                                    FR
NAME_LATN                                                France
NUTS_NAME                                                France
MOUNT_TYPE                                                  0.0
URBN_TYPE                                                     0
COAST_TYPE                                                    0
FID                                                          FR
geometry      MULTIPOLYGON (((9954236.1162 -3059379.3164, 99...
Name: 0, dtype: object
id                                                           HR
NUTS_ID                                                      HR
LEVL_CODE                                                     0
CNTR_CODE                                                    HR
NAME_LATN        

It is also possible to create an earthkit-data object from an already instantiated geopandas dataframe

In [14]:
gpd_df = gpd.read_file(geojson_source.path)
ek_gpd_df = from_object(gpd_df)
ek_gpd_df.describe()

Unnamed: 0,id,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
0,FR,FR,0,FR,France,France,0.0,0,0,FR,"MULTIPOLYGON (((9954236.116 -3059379.316, 9961..."
1,HR,HR,0,HR,Hrvatska,Hrvatska,0.0,0,0,HR,"MULTIPOLYGON (((4827385.889 2618351.326, 48483..."
2,HU,HU,0,HU,Magyarország,Magyarország,0.0,0,0,HU,"POLYGON ((5214660.069 2880853.832, 5216710.220..."
3,AL,AL,0,AL,Shqipëria,Shqipëria,0.0,0,0,AL,"POLYGON ((5129579.170 2204098.752, 5148385.473..."
4,AT,AT,0,AT,Österreich,Österreich,0.0,0,0,AT,"POLYGON ((4742889.368 2876362.725, 4783217.798..."
...,...,...,...,...,...,...,...,...,...,...,...
2005,TRC21,TRC21,3,TR,Şanlıurfa,Şanlıurfa,4.0,2,3,TRC21,"POLYGON ((6904684.585 2120354.802, 6938677.828..."
2006,TRC22,TRC22,3,TR,Diyarbakır,Diyarbakır,4.0,2,3,TRC22,"POLYGON ((6989716.599 2273670.524, 6982786.486..."
2007,NO0B2,NO0B2,3,NO,Svalbard,Svalbard,3.0,3,1,NO0B2,"MULTIPOLYGON (((4754167.335 6382461.409, 47465..."
2008,NO0B,NO0B,2,NO,Jan Mayen and Svalbard,Jan Mayen and Svalbard,,0,0,NO0B,"MULTIPOLYGON (((4754167.335 6382461.409, 47465..."


In [15]:
print(f"Number of polygons in geopandas: {len(ek_gpd_df)}")

print("Iterate of polygons:")
for thing in ek_gpd_df[:2]:
    print(thing)

Number of polygons in geopandas: 2010
Iterate of polygons:
id                                                           FR
NUTS_ID                                                      FR
LEVL_CODE                                                     0
CNTR_CODE                                                    FR
NAME_LATN                                                France
NUTS_NAME                                                France
MOUNT_TYPE                                                  0.0
URBN_TYPE                                                     0
COAST_TYPE                                                    0
FID                                                          FR
geometry      MULTIPOLYGON (((9954236.1162 -3059379.3164, 99...
Name: 0, dtype: object
id                                                           HR
NUTS_ID                                                      HR
LEVL_CODE                                                     0
CNTR_CODE             