# ANOVOS - Geospatial
Following notebook shows the list of functions related to "geospatial" module provided under ANOVOS package and how it can be invoked accordingly.


**Setting Spark Session**

In [1]:
import pandas as pd
import pyspark
import os
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark.sql.window import Window

In [2]:
#set run type variable
run_type = "local" # "local", "emr", "databricks", "ak8s"

In [4]:
#For run_type Azure Kubernetes, run the following block 
import os
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

if run_type == "ak8s":
    fs_path="<insert conf spark.hadoop.fs master url here> ex: spark.hadoop.fs.azure.sas.<container>.<account_name>.blob.core.windows.net"
    auth_key="<insert value of sas_token here>"
    master_url="<insert kubernetes master url path here> ex: k8s://"
    docker_image="<insert name docker image here>"
    kubernetes_namespace ="<insert kubernetes namespace here>"

    # Create Spark config for our Kubernetes based cluster manager
    sparkConf = SparkConf()
    sparkConf.setMaster(master_url)
    sparkConf.setAppName("Anovos_pipeline")
    sparkConf.set("spark.submit.deployMode","client")
    sparkConf.set("spark.kubernetes.container.image", docker_image)
    sparkConf.set("spark.kubernetes.namespace", kubernetes_namespace)
    sparkConf.set("spark.executor.instances", "4")
    sparkConf.set("spark.executor.cores", "4")
    sparkConf.set("spark.executor.memory", "16g")
    sparkConf.set("spark.kubernetes.pyspark.pythonVersion", "3")
    sparkConf.set("spark.kubernetes.authenticate.driver.serviceAccountName", "spark")
    sparkConf.set(fs_path,auth_key)
    sparkConf.set("spark.kubernetes.authenticate.serviceAccountName", "spark")
    sparkConf.set("spark.jars.packages", "org.apache.hadoop:hadoop-azure:3.2.0,com.microsoft.azure:azure-storage:8.6.3,io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20,org.apache.spark:spark-avro_2.12:3.2.1")

    # Initialize our Spark cluster, this will actually
    # generate the worker nodes.
    spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
    sc = spark.sparkContext

#For other run types import from anovos.shared.
else:
    from anovos.shared.spark import *
    auth_key = "NA"

In [5]:
sc.setLogLevel("ERROR")
import warnings
warnings.filterwarnings('ignore')

In [6]:
inputPath = "../data/income_dataset/csv"
outputPath = "../output/income_dataset/data_transformer"

In [7]:
from anovos.data_ingest.data_ingest import read_dataset

In [8]:
df = read_dataset(spark, file_path = inputPath, file_type = "csv",file_configs = {"header": "True", 
                                                                           "delimiter": "," , 
                                                                           "inferSchema": "True"})
df = df
df.toPandas().head(5)

                                                                                

Unnamed: 0,ifa,age,workclass,fnlwgt,logfnl,empty,education,education-num,marital-status,occupation,...,capital-gain,capital-loss,hours-per-week,native-country,income,dt_1,dt_2,latitude,longitude,geohash
0,1a,,State-gov,77516.0,4.889391,,Bachelors,13.0,Never-married,Adm-clerical,...,2174.0,0.0,40.0,UnitedStates,<=50K,1/8/16 5:59,1/16/16 5:59,-38.624096,177.982468,rb68np99
1,2a,,Self-emp-not-inc,83311.0,4.920702,,Bachelors,13.0,Married-civ-spouse,Exec-managerial,...,0.0,0.0,13.0,UnitedStates,<=50K,1/8/16 21:09,1/12/16 21:09,-40.880497,174.992142,rckjypw0
2,3a,38.0,Private,215646.0,5.333741,,HS-grad,9.0,Divorced,Handlers-cleaners,...,0.0,0.0,40.0,UnitedStates,<=50K,3/8/16 2:21,3/20/16 2:21,-37.73563,176.164047,rckm712q
3,4a,53.0,Private,234721.0,5.370552,,11th,7.0,Married-civ-spouse,Handlers-cleaners,...,0.0,0.0,40.0,UnitedStates,<=50K,3/8/16 6:31,3/14/16 6:31,-39.536491,176.832321,rckndgte
4,5a,,Private,338409.0,5.529442,,Bachelors,13.0,Married-civ-spouse,Prof-specialty,...,0.0,0.0,40.0,Cuba,<=50K,3/8/16 9:45,3/10/16 9:45,-41.128094,175.033722,rckq4596


# Geo Format Latlon

In [9]:
from anovos.data_transformer.geospatial import geo_format_latlon

In [10]:
# Example 1 - transform latitude,longitude columns to degrees-minutes-second format
odf = geo_format_latlon(df.select("latitude", "longitude"), ["latitude"], ["longitude"], "dd", "dms")
odf.toPandas().head()

                                                                                

Unnamed: 0,latitude,longitude,latitude_longitude_lat_dms,latitude_longitude_lon_dms
0,-38.624096,177.982468,"[-39.0, 22.0, 33.25468826293945]","[177.0, 58.0, 56.88372039794922]"
1,-40.880497,174.992142,"[-41.0, 7.0, 10.210871696472168]","[174.0, 59.0, 31.710119247436523]"
2,-37.73563,176.164047,"[-38.0, 15.0, 51.73185729980469]","[176.0, 9.0, 50.56991958618164]"
3,-39.536491,176.832321,"[-40.0, 27.0, 48.630958557128906]","[176.0, 49.0, 56.356319427490234]"
4,-41.128094,175.033722,"[-42.0, 52.0, 18.862607955932617]","[175.0, 2.0, 1.3988399505615234]"


In [11]:
# Example 2 - transform latitude,longitude columns to radian format
odf = geo_format_latlon(df.select("latitude", "longitude"), ["latitude"], ["longitude"], "dd", "radian")
odf.toPandas().head()

Unnamed: 0,latitude,longitude,latitude_longitude_lat_radian,latitude_longitude_lon_radian
0,-38.624096,177.982468,-0.674118,3.10638
1,-40.880497,174.992142,-0.713499,3.054189
2,-37.73563,176.164047,-0.658611,3.074643
3,-39.536491,176.832321,-0.690042,3.086306
4,-41.128094,175.033722,-0.717821,3.054915


In [12]:
# Example 3 - transform latitude,longitude columns to cartesian format
odf = geo_format_latlon(df.select("latitude", "longitude"), ["latitude"], ["longitude"], "dd", "cartesian")
odf.toPandas().head()

Unnamed: 0,latitude,longitude,latitude_longitude_x,latitude_longitude_y,latitude_longitude_z
0,-38.624096,177.982468,-4974316.5,175230.953125,-3976836.25
1,-40.880497,174.992142,-4798581.0,420484.65625,-4169720.25
2,-37.73563,176.164047,-5027180.5,337073.5625,-3899178.25
3,-39.536491,176.832321,-4905938.0,271508.4375,-4055590.25
4,-41.128094,175.033722,-4780889.0,415438.375,-4190497.25


In [13]:
# Example 4 - transform latitude,longitude columns to radian format
odf = geo_format_latlon(df.select("latitude", "longitude"), ["latitude"], ["longitude"], "dd", "geohash")
odf.toPandas().head()

Unnamed: 0,latitude,longitude,latitude_longitude_geohash
0,-38.624096,177.982468,rcns60dy
1,-40.880497,174.992142,rbsrqjcm
2,-37.73563,176.164047,rcm32dqw
3,-39.536491,176.832321,rbvxpft7
4,-41.128094,175.033722,rbsqnbze


In [14]:
# Example 5 - transform latitude,longitude columns to radian format and replace original columns
odf = geo_format_latlon(df, ["latitude"], ["longitude"], "dd", "cartesian", output_mode="replace")
odf.toPandas().head()

Unnamed: 0,ifa,age,workclass,fnlwgt,logfnl,empty,education,education-num,marital-status,occupation,...,capital-loss,hours-per-week,native-country,income,dt_1,dt_2,geohash,latitude_longitude_x,latitude_longitude_y,latitude_longitude_z
0,1a,,State-gov,77516.0,4.889391,,Bachelors,13.0,Never-married,Adm-clerical,...,0.0,40.0,UnitedStates,<=50K,1/8/16 5:59,1/16/16 5:59,rb68np99,-4974316.5,175230.953125,-3976836.25
1,2a,,Self-emp-not-inc,83311.0,4.920702,,Bachelors,13.0,Married-civ-spouse,Exec-managerial,...,0.0,13.0,UnitedStates,<=50K,1/8/16 21:09,1/12/16 21:09,rckjypw0,-4798581.0,420484.65625,-4169720.25
2,3a,38.0,Private,215646.0,5.333741,,HS-grad,9.0,Divorced,Handlers-cleaners,...,0.0,40.0,UnitedStates,<=50K,3/8/16 2:21,3/20/16 2:21,rckm712q,-5027180.5,337073.5625,-3899178.25
3,4a,53.0,Private,234721.0,5.370552,,11th,7.0,Married-civ-spouse,Handlers-cleaners,...,0.0,40.0,UnitedStates,<=50K,3/8/16 6:31,3/14/16 6:31,rckndgte,-4905938.0,271508.4375,-4055590.25
4,5a,,Private,338409.0,5.529442,,Bachelors,13.0,Married-civ-spouse,Prof-specialty,...,0.0,40.0,Cuba,<=50K,3/8/16 9:45,3/10/16 9:45,rckq4596,-4780889.0,415438.375,-4190497.25


Latitude and longitude columns are replaced by latitude_longitude_x, latitude_longitude_y and latitude_longitude_z

# Geo Format Cartesian

In [15]:
from anovos.data_transformer.geospatial import geo_format_cartesian

In [16]:
# Example - transform cartesian format to decimal degrees format
# create cartesian format columns x,y,z
odf_cartesian = geo_format_latlon(df.select("latitude", "longitude"), ["latitude"], ["longitude"], "dd", "cartesian", output_mode="mode")
odf_cartesian = odf_cartesian.withColumnRenamed("latitude_longitude_x", "x").withColumnRenamed("latitude_longitude_y", "y").withColumnRenamed("latitude_longitude_z", "z")
# geo_format_cartesian
odf_dd = geo_format_cartesian(odf_cartesian, "x", "y", "z", "dd")
odf_dd.show()

+------------+-----------+----------+---------+----------+------------+------------+
|    latitude|  longitude|         x|        y|         z|x_y_z_lat_dd|x_y_z_lon_dd|
+------------+-----------+----------+---------+----------+------------+------------+
|-38.62409592|177.9824677|-4974316.5|175230.95|-3976836.2|  -38.624096|   177.98247|
|-40.88049698|174.9921417|-4798581.0|420484.66|-4169720.2|  -40.880497|   174.99214|
|-37.73563004|176.1640472|-5027180.5|337073.56|-3899178.2|   -37.73563|   176.16405|
| -39.5364914|176.8323212|-4905938.0|271508.44|-4055590.2|   -39.53649|   176.83232|
|-41.12809372|175.0337219|-4780889.0|415438.38|-4190497.2|  -41.128094|   175.03372|
|-36.69749833|174.7217255|-5086625.0|469926.66|-3807252.2|    -36.6975|   174.72173|
|-40.93322754|175.5478973|-4798606.0|373622.44|-4174151.5|  -40.933228|    175.5479|
|-35.81871033|174.5117188|-5142395.0| 494094.8|-3728457.8|   -35.81871|   174.51172|
|-36.88520432|174.5949707|-5073128.5|480001.12|-3823967.0|  -36.8

# Geo Format Geohash

In [17]:
from anovos.data_transformer.geospatial import geo_format_geohash

In [18]:
# Example 1 - transform geohash column to decimal-degree format
odf = geo_format_geohash(df.select("geohash"), ["geohash"], "dd")
odf.toPandas()

                                                                                

Unnamed: 0,geohash,geohash_lat_dd,geohash_lon_dd
0,rb68np99,-43.551998,172.531998
1,rckjypw0,-36.917000,174.647003
2,rckm712q,-37.037998,174.858994
3,rckndgte,-36.806000,174.503998
4,rckq4596,-36.894001,174.815994
...,...,...,...
32556,rcm32hdg,-37.723999,176.136993
32557,rb6b82me,-43.504002,172.636002
32558,rckqh5tv,-36.894001,174.910004
32559,rckkughm,-37.116001,174.940994


In [19]:
# Example 1 - transform geohash column to radian format
odf = geo_format_geohash(df.select("geohash"), ["geohash"], "radian")
odf.toPandas()

Unnamed: 0,geohash,geohash_lat_radian,geohash_lon_radian
0,rb68np99,-0.760126,3.011251
1,rckjypw0,-0.644323,3.048165
2,rckm712q,-0.646435,3.051865
3,rckndgte,-0.642386,3.045669
4,rckq4596,-0.643922,3.051115
...,...,...,...
32556,rcm32hdg,-0.658408,3.074171
32557,rb6b82me,-0.759288,3.013067
32558,rckqh5tv,-0.643922,3.052755
32559,rckkughm,-0.647796,3.053296


In [20]:
# Example 1 - transform geohash column to cartesian format
odf = geo_format_geohash(df.select("geohash"), ["geohash"], "cartesian")
odf.toPandas()

Unnamed: 0,geohash,geohash_x,geohash_y,geohash_z
0,rb68np99,-4578218.0,600132.75000,-4389705.50
1,rckjypw0,-5071448.5,475196.06250,-3826794.25
2,rckm712q,-5065112.0,455702.81250,-3837542.75
3,rckndgte,-5077617.0,488561.12500,-3816919.00
4,rckq4596,-5074357.5,460374.00000,-3824749.00
...,...,...,...,...
32556,rcm32hdg,-5027810.5,339499.96875,-3898155.50
32557,rb6b82me,-4582945.5,592292.81250,-4385836.00
32558,rckqh5tv,-5075106.0,452048.34375,-3824749.00
32559,rckkughm,-5060550.0,447992.21875,-3844462.25


# Location Distance

In [21]:
from anovos.data_transformer.geospatial import location_distance

In [22]:
# Prepare data with two locations
df_2 = df.withColumn("latitude_2", F.col("latitude")+3).withColumn("longitude_2", F.col("longitude")-2)

This function requires 2 latitude-longitude pairs. For demonstrating purpose, we have created latitude_2 and longitude_2 columns.

In [23]:
# Example 1 - calculate haversine distance (default)
odf = location_distance(df_2, ["latitude", "longitude"], ["latitude_2", "longitude_2"], result_prefix="loc")
odf.select("ifa", "latitude", "longitude", "latitude_2", "longitude_2", "loc_distance").toPandas()

                                                                                

Unnamed: 0,ifa,latitude,longitude,latitude_2,longitude_2,loc_distance
0,1a,-38.624096,177.982468,-35.624096,175.982468,377756.37500
1,2a,-40.880497,174.992142,-37.880497,172.992142,375241.18750
2,3a,-37.735630,176.164047,-34.735630,174.164047,378728.71875
3,4a,-39.536491,176.832321,-36.536491,174.832321,376747.50000
4,5a,-41.128094,175.033722,-38.128094,173.033722,374961.40625
...,...,...,...,...,...,...
32556,32557a,-41.293278,174.783737,-38.293278,172.783737,374774.37500
32557,32558a,-45.855858,170.513382,-42.855858,168.513382,369513.53125
32558,32559a,-37.743980,175.225586,-34.743980,173.225586,378719.06250
32559,32560a,-37.750027,175.278122,-34.750027,173.278122,378713.06250


In [24]:
# Example 2 - calculate Euclidean distance
odf = location_distance(df_2, ["latitude", "longitude"], ["latitude_2", "longitude_2"], result_prefix="loc", distance_type="euclidean")
odf.select("ifa", "latitude", "longitude", "latitude_2", "longitude_2", "loc_distance").toPandas()

                                                                                

Unnamed: 0,ifa,latitude,longitude,latitude_2,longitude_2,loc_distance
0,1a,-38.624096,177.982468,-35.624096,175.982468,377701.40625
1,2a,-40.880497,174.992142,-37.880497,172.992142,375186.93750
2,3a,-37.735630,176.164047,-34.735630,174.164047,378673.00000
3,4a,-39.536491,176.832321,-36.536491,174.832321,376692.56250
4,5a,-41.128094,175.033722,-38.128094,173.033722,374907.21875
...,...,...,...,...,...,...
32556,32557a,-41.293278,174.783737,-38.293278,172.783737,374720.25000
32557,32558a,-45.855858,170.513382,-42.855858,168.513382,369461.81250
32558,32559a,-37.743980,175.225586,-34.743980,173.225586,378663.62500
32559,32560a,-37.750027,175.278122,-34.750027,173.278122,378657.18750


In [25]:
# Example 3 - calculate Vincenty distance
odf = location_distance(df_2, ["latitude", "longitude"], ["latitude_2", "longitude_2"], result_prefix="loc", distance_type="vincenty")
odf.select("ifa", "latitude", "longitude", "latitude_2", "longitude_2", "loc_distance").toPandas()

                                                                                

Unnamed: 0,ifa,latitude,longitude,latitude_2,longitude_2,loc_distance
0,1a,-38.624096,177.982468,-35.624096,175.982468,377382.50000
1,2a,-40.880497,174.992142,-37.880497,172.992142,374976.50000
2,3a,-37.735630,176.164047,-34.735630,174.164047,378311.75000
3,4a,-39.536491,176.832321,-36.536491,174.832321,376417.12500
4,5a,-41.128094,175.033722,-38.128094,173.033722,374708.87500
...,...,...,...,...,...,...
32556,32557a,-41.293278,174.783737,-38.293278,172.783737,374529.96875
32557,32558a,-45.855858,170.513382,-42.855858,168.513382,369496.93750
32558,32559a,-37.743980,175.225586,-34.743980,173.225586,378303.06250
32559,32560a,-37.750027,175.278122,-34.750027,173.278122,378296.78125


# Geohash Precision Control

In [26]:
from anovos.data_transformer.geospatial import geohash_precision_control

In [27]:
odf = geohash_precision_control(df.select("geohash"), ["geohash"], output_precision = 5)
odf.toPandas()

2022-11-22 15:59:24.407 | INFO     | anovos.data_transformer.geospatial:geohash_precision_control:692 - Precision of the output geohashes will be capped at 5.


Unnamed: 0,geohash,geohash_precision_5
0,rb68np99,rb68n
1,rckjypw0,rckjy
2,rckm712q,rckm7
3,rckndgte,rcknd
4,rckq4596,rckq4
...,...,...
32556,rcm32hdg,rcm32
32557,rb6b82me,rb6b8
32558,rckqh5tv,rckqh
32559,rckkughm,rckku


# Location in Polygon

In [28]:
from anovos.data_transformer.geospatial import location_in_polygon

In [29]:
# Example - check if any datapoint is in Africa via Polygon
africa_polygon = {"type":"FeatureCollection",
                  "features":[
                      {"type":"Feature","properties":{},"geometry":{
                          "type":"Polygon","coordinates":[
                              [[-6.328125,35.88905007936091],[-10.1953125,32.39851580247402],[-10.1953125,29.6880527498568],[-13.18359375,27.527758206861886],[-16.171875,24.04646399966658],[-16.69921875,22.268764039073968],[-16.69921875,19.145168196205297],[-18.28125,14.43468021529728],[-16.171875,11.523087506868514],[-12.83203125,8.581021215641854],[-11.25,5.965753671065536],[-7.3828125,5.965753671065536],[-8.0859375,3.5134210456400448],[-2.4609375,4.915832801313164],[5.44921875,6.140554782450308],[5.625,4.214943141390651],[10.37109375,4.039617826768437],[8.7890625,-1.7575368113083125],[12.12890625,-5.0909441750333855],[13.886718749999998,-9.968850608546097],[12.3046875,-14.093957177836224],[11.6015625,-17.644022027872726],[13.7109375,-21.28937435586041],[14.589843749999998,-24.5271348225978],[15.468749999999998,-27.68352808378776],[17.05078125,-29.382175075145277],[18.45703125,-31.95216223802496],[16.875,-34.161818161230386],[19.51171875,-34.74161249883172],[22.67578125,-34.45221847282653],[27.24609375,-34.45221847282653],[29.179687499999996,-32.249974455863295],[32.6953125,-30.29701788337204],[32.87109375,-27.839076094777802],[34.453125,-26.273714024406416],[34.98046875,-25.48295117535531],[35.859375,-23.88583769986199],[35.859375,-21.453068633086772],[35.859375,-19.476950206488414],[39.0234375,-17.308687886770024],[41.1328125,-14.94478487508836],[40.78125,-11.350796722383672],[40.078125,-8.928487062665504],[40.25390625,-6.839169626342808],[40.60546875,-4.214943141390639],[41.8359375,-2.460181181020993],[42.5390625,-1.5818302639606454],[44.47265625,-0.17578097424708533],[46.58203125,1.2303741774326145],[47.98828124999999,3.337953961416485],[49.21875,5.441022303717974],[51.50390625,8.928487062665504],[51.328125,11.86735091145932],[48.69140625,11.695272733029402],[46.05468749999999,11.350796722383672],[44.6484375,11.350796722383672],[43.76953125,12.211180191503997],[42.5390625,13.923403897723347],[41.30859375,15.453680224345835],[40.42968749999999,17.476432197195518],[37.79296875,18.979025953255267],[37.08984375,22.268764039073968],[35.5078125,24.5271348225978],[34.27734375,27.059125784374068],[34.80468749999999,30.29701788337205],[31.9921875,31.952162238024975],[29.70703125,31.20340495091737],[22.5,33.137551192346145],[20.7421875,32.54681317351517],[19.86328125,31.05293398570514],[16.34765625,31.50362930577303],[14.94140625,32.54681317351517],[11.77734375,33.284619968887675],[12.3046875,35.02999636902566],[10.546875,36.73888412439431],[10.01953125,37.996162679728116],[7.91015625,37.160316546736745],[3.1640625,37.020098201368114],[0.87890625,36.87962060502676],[-1.0546875,35.60371874069731],[-6.328125,35.88905007936091]]]}}]}
odf = location_in_polygon(df, ["latitude"], ["longitude"], africa_polygon)

In [30]:
odf.filter(F.col("latitude_longitude_in_poly")==1).toPandas()

                                                                                

Unnamed: 0,ifa,age,workclass,fnlwgt,logfnl,empty,education,education-num,marital-status,occupation,...,capital-loss,hours-per-week,native-country,income,dt_1,dt_2,latitude,longitude,geohash,latitude_longitude_in_poly


None of the datapoints are in Africa.

# Location in Country

In [31]:
from anovos.data_transformer.geospatial import location_in_country

In [32]:
# Example - check if any datapoints are in US
odf = location_in_country(spark, df, ["latitude"], ["longitude"], "US")
odf.filter(F.col("latitude_longitude_in_US_approx")==1).toPandas()

Unnamed: 0,ifa,age,workclass,fnlwgt,logfnl,empty,education,education-num,marital-status,occupation,...,capital-loss,hours-per-week,native-country,income,dt_1,dt_2,latitude,longitude,geohash,latitude_longitude_in_US_approx
0,4365a,29,Private,355569,5.550924,,Assoc-voc,11,Never-married,Exec-managerial,...,0,50,United-States,<=50K,10/13/19 23:19,10/22/19 23:19,34.054401,-118.244102,rck9hhnn,1
1,5170a,41,Self-emp-not-inc,27305,4.436242,,HS-grad,9,Married-civ-spouse,Farming-fishing,...,0,40,United-States,>50K,12/13/19 6:37,12/16/19 6:37,29.425724,-98.495183,rb68qd26,1
2,12261a,31,Private,166343,,,HS-grad,9,Never-married,Machine-op-inspct,...,0,50,*,<=50K,6/28/20 9:35,7/6/20 9:35,29.425693,-98.495191,pzc55xr8,1
3,14305a,22,Private,156822,,,10th,6,Never-married,Sales,...,1762,25,United-States,<=50K,11/1/20 0:40,11/4/20 0:40,39.09475,-94.577707,rckgff2j,1
4,23447a,28,Private,271466,,,HS-grad,9,Married-civ-spouse,Exec-managerial,...,0,45,United-States,>50K,4/6/20 10:27,4/16/20 10:27,37.365101,-122.0383,rckmdqg5,1
5,25464a,41,Private,96635,,,HS-grad,9,Never-married,Exec-managerial,...,0,60,United-States,<=50K,5/25/20 7:44,6/6/20 7:44,25.774099,-80.181702,rcef57x7,1
6,26285a,37,Private,329026,,,HS-grad,9,Married-civ-spouse,Adm-clerical,...,0,40,United-States,>50K,6/13/20 1:07,6/16/20 1:07,29.425697,-98.495188,rb6b0mr3,1


7 datapoints are in the United States.

# Centroid

In [33]:
from anovos.data_transformer.geospatial import centroid

In [34]:
# Example 1 - calculate centroid of all datapoints
odf = centroid(df, "latitude", "longitude", None)
odf.toPandas()

Unnamed: 0,lat_centroid,long_centroid
0,-39.556639,174.393394


In [35]:
# Example 2 - calculate centroid based on identifier column
odf = centroid(df, "latitude", "longitude", "ifa")
odf.toPandas()

                                                                                

Unnamed: 0,ifa,lat_centroid,long_centroid
0,99a,-37.666816,176.131277
1,684a,-36.613055,174.669316
2,765a,-38.142669,176.307236
3,797a,-45.910612,170.404627
4,1067a,-37.692156,176.122998
...,...,...,...
32556,32028a,-36.781720,174.742931
32557,32186a,-37.214620,175.872412
32558,32265a,-36.829446,174.779309
32559,32320a,-46.369482,168.400254


# Weighted Centroid

In [36]:
from anovos.data_transformer.geospatial import weighted_centroid

In [37]:
odf = weighted_centroid(df, "ifa", "latitude", "longitude")
odf.toPandas()

Unnamed: 0,ifa,lat_centroid,long_centroid
0,99a,-39.556639,174.393394
1,684a,-39.556639,174.393394
2,765a,-39.556639,174.393394
3,797a,-39.556639,174.393394
4,1067a,-39.556639,174.393394
...,...,...,...
32556,32028a,-39.556639,174.393394
32557,32186a,-39.556639,174.393394
32558,32265a,-39.556639,174.393394
32559,32320a,-39.556639,174.393394


# ROG Calculation

In [38]:
from anovos.data_transformer.geospatial import rog_calculation

In [39]:
# Example 1 - Calculate Radius of Gyration (in meter) of all datapoints
odf = rog_calculation(df, "latitude", "longitude", None)
odf.toPandas()

Unnamed: 0,radius_of_gyration
0,320361.15246


In [40]:
# Example 2 - Calculate Radius of Gyration (in meter) of datapoints based on identifier column
odf = rog_calculation(df, "latitude", "longitude", "ifa")
odf.toPandas()

Unnamed: 0,ifa,radius_of_gyration
0,99a,0.457362
1,684a,0.590336
2,765a,0.087678
3,797a,0.444144
4,1067a,0.278352
...,...,...
32556,13752a,0.313800
32557,17034a,0.496975
32558,18841a,0.510683
32559,23734a,0.527652


# Reverse Geocoding

In [41]:
from anovos.data_transformer.geospatial import reverse_geocoding

In [42]:
# Example - reverse latitude-longitude pairs into address( name of place, region and country code)
odf = reverse_geocoding(df, "latitude", "longitude")
odf.toPandas()

Loading formatted geocoded file...
Loading formatted geocoded file...
                                                                                

Unnamed: 0,latitude,longitude,name_of_place,region,country_code
0,-38.624096,177.982468,Gisborne,Gisborne,NZ
1,-40.880497,174.992142,Paraparaumu,Wellington,NZ
2,-37.735630,176.164047,Tauranga,Bay of Plenty,NZ
3,-39.536491,176.832321,Taradale,Hawke's Bay,NZ
4,-41.128094,175.033722,Upper Hutt,Wellington,NZ
...,...,...,...,...,...
32556,-41.293278,174.783737,Wellington,Wellington,NZ
32557,-45.855858,170.513382,Dunedin,Otago,NZ
32558,-37.743980,175.225586,Hamilton,Waikato,NZ
32559,-37.750027,175.278122,Hamilton,Waikato,NZ
