# Get Country from coordinates
Try to get country informations relying on coordinates (Reverse Geocoding: for a reference, please see [here](https://towardsdatascience.com/reverse-geocoding-in-python-a915acf29eb6)

In [1]:
import pandas as pd
import pycountry

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from tqdm.notebook import tqdm

from src.features.smarterdb import global_connection, Dataset

In [2]:
global_connection()
dataset = Dataset.objects(file="ovine_SNP50HapMap_data.zip").get()
tqdm.pandas()

Open the kijas metadata files and display its contents:

In [3]:
with open(dataset.working_dir / "ovine_SNP50HapMap_data/kijas2012_dataset.xlsx", "rb") as handle:
    infos = pd.read_excel(handle)
infos.head()

Unnamed: 0,Breed,code,Latitude (N) degrees,Latitude (N) minutes,latitude,Longitude (O) degrees,Longitude (O) minutes,longitude,Location/source,Remark
0,AfricanDorper,ADP,-19,-34.0,-19.566667,18,7.0,18.116667,Grootfontein,Recent Dorset-Black Persian cross
1,AfricanWhiteDorper,AWD,-19,-34.0,-19.566667,18,7.0,18.116667,Grootfontein,"Recent, hybrid breed"
2,Afshari,AFS,36,40.0,36.666667,48,29.0,48.483333,"Maneshan, Zanjan, Iran",
3,Altamurana,ALT,40,49.0,40.816667,16,33.0,16.55,Altamura,
4,AustralianCoopworth,CPW,51,17.0,51.283333,0,24.0,0.4,West Malling,Derived from Romney


Define a new column in order to query for a location:

In [4]:
infos["coordinates"] = infos["latitude"].map(str) + "," + infos["longitude"].map(str)

In [5]:
locator = Nominatim(user_agent="myGeocoder", timeout=10)
rgeocode = RateLimiter(locator.reverse, min_delay_seconds=0.001)
def get_country(coordinate):
    data = rgeocode(coordinate, language="English")
    if data:
        country_code = data.raw['address']['country_code']
        return pycountry.countries.get(alpha_2=country_code).name
    else:
        return data
infos["country"] = infos["coordinates"].progress_apply(get_country)

  0%|          | 0/74 [00:00<?, ?it/s]

In [6]:
infos

Unnamed: 0,Breed,code,Latitude (N) degrees,Latitude (N) minutes,latitude,Longitude (O) degrees,Longitude (O) minutes,longitude,Location/source,Remark,coordinates,country
0,AfricanDorper,ADP,-19,-34.0,-19.566667,18,7.0,18.116667,Grootfontein,Recent Dorset-Black Persian cross,"-19.5666666666667,18.1166666666667",Namibia
1,AfricanWhiteDorper,AWD,-19,-34.0,-19.566667,18,7.0,18.116667,Grootfontein,"Recent, hybrid breed","-19.5666666666667,18.1166666666667",Namibia
2,Afshari,AFS,36,40.0,36.666667,48,29.0,48.483333,"Maneshan, Zanjan, Iran",,"36.6666666666667,48.4833333333333","Iran, Islamic Republic of"
3,Altamurana,ALT,40,49.0,40.816667,16,33.0,16.550000,Altamura,,"40.8166666666667,16.55",Italy
4,AustralianCoopworth,CPW,51,17.0,51.283333,0,24.0,0.400000,West Malling,Derived from Romney,"51.2833333333333,0.4",United Kingdom
...,...,...,...,...,...,...,...,...,...,...,...,...
69,SwissWhiteAlpineSheep,SWA,46,35.0,46.583333,8,20.0,8.333333,Swiss Alps,,"46.5833333333333,8.33333333333333",Switzerland
70,Tibetan,TIB,32,0.0,32.000000,84,0.0,84.000000,Tibet,,"32.0,84.0",China
71,ValaisBlacknoseSheep,VBS,46,11.0,46.183333,7,33.0,7.550000,Valais,,"46.1833333333333,7.55",Switzerland
72,ValaisRedSheep,VRS,46,10.0,46.166667,7,12.0,7.200000,West Valais,,"46.1666666666667,7.2",Switzerland


In [7]:
infos.to_excel(dataset.working_dir / "ovine_SNP50HapMap_data/kijas2012_dataset_fix.xlsx", index=False)

The only location I couldn't resolve is "Dorset, UK", which has wrong coordinates. I resolve the location and fix those coordinates in excel table

In [8]:
dorset = locator.geocode("Dorset, UK")
dorset.latitude, dorset.longitude

(50.79683685, -2.34473226124306)

Next I will put the updated `xlsx` file in the row dataset