In [65]:
import requests as rq
from zipfile import ZipFile
import pandas as pd
import pygeohash as gh
import json
import boto3

In [5]:
def download_zip_file(online_path, local_path):
    with rq.get(online_path) as response:
        open(local_path, "wb").write(response.content)
    return None

In [13]:
def get_full_df(local_zip_path, filename):
    with ZipFile(local_zip_path) as zip_file:
        df = pd.read_csv(zip_file.open(filename))
    return df

In [17]:
def filter_country(dataframe, country_to_filter):
    df = dataframe[dataframe.country==country_to_filter]
    return df

In [34]:
def select_columns(dataframe,*argv):
    df=pd.DataFrame()
    for arg in argv:
        df[arg]=dataframe[[arg]]
    return df

In [53]:
def adding_geohash_code(dataframe):
    dataframe["geohash"]=dataframe.apply(lambda x: gh.encode(x.lat, x.lng, precision=12), axis=1)
    return dataframe

In [61]:
def from_df_to_json(dataframe, json_filename):
    result = dataframe.to_json(orient="records")
    with open(json_filename, 'w', encoding='utf-8') as f:
        json.dump(result, f, ensure_ascii=False, indent=4)
    return None

In [None]:
def upload_to_s3(filename, s3_bucket, s3_path):
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(filename, s3_bucket, f'{s3_path}{filename}')
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [8]:
download_zip_file('https://simplemaps.com/static/data/world-cities/basic/simplemaps_worldcities_basicv1.75.zip','worldcities.zip')

In [14]:
dataframe_cities=get_full_df('worldcities.zip','worldcities.csv')

In [18]:
dataframe_portugal = filter_country(dataframe_cities,'Portugal')

In [31]:
dataframe_portugal

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
734,Lisbon,Lisbon,38.7080,-9.1390,Portugal,PT,PRT,Lisboa,primary,506654.0,1620619017
1728,Sintra,Sintra,38.7974,-9.3904,Portugal,PT,PRT,Lisboa,minor,377835.0,1620848791
2050,Vila Nova de Gaia,Vila Nova de Gaia,41.1333,-8.6167,Portugal,PT,PRT,Porto,minor,302295.0,1620040225
2437,Porto,Porto,41.1495,-8.6108,Portugal,PT,PRT,Porto,admin,237591.0,1620356810
2756,Cascais,Cascais,38.6969,-9.4206,Portugal,PT,PRT,Lisboa,minor,206479.0,1620824685
...,...,...,...,...,...,...,...,...,...,...,...
41315,Vila do Bispo,Vila do Bispo,37.0825,-8.9119,Portugal,PT,PRT,Faro,minor,5258.0,1620734353
41531,Mêda,Meda,40.9667,-7.2667,Portugal,PT,PRT,Guarda,minor,5202.0,1620700858
41563,Vila Nova de Paiva,Vila Nova de Paiva,40.8500,-7.7333,Portugal,PT,PRT,Viseu,minor,5176.0,1620015183
41784,Alfândega da Fé,Alfandega da Fe,41.3500,-6.9667,Portugal,PT,PRT,Bragança,minor,5104.0,1620280821


In [35]:
dataframe_select=select_columns(dataframe_portugal,'city','lat','lng','population')

In [36]:
dataframe_select

Unnamed: 0,city,lat,lng,population
734,Lisbon,38.7080,-9.1390,506654.0
1728,Sintra,38.7974,-9.3904,377835.0
2050,Vila Nova de Gaia,41.1333,-8.6167,302295.0
2437,Porto,41.1495,-8.6108,237591.0
2756,Cascais,38.6969,-9.4206,206479.0
...,...,...,...,...
41315,Vila do Bispo,37.0825,-8.9119,5258.0
41531,Mêda,40.9667,-7.2667,5202.0
41563,Vila Nova de Paiva,40.8500,-7.7333,5176.0
41784,Alfândega da Fé,41.3500,-6.9667,5104.0


In [54]:
final_df=adding_geohash_code(dataframe_select)

In [55]:
final_df

Unnamed: 0,city,lat,lng,population,geohash
734,Lisbon,38.7080,-9.1390,506654.0,eycs0n94my5w
1728,Sintra,38.7974,-9.3904,377835.0,eyckdqf55zd3
2050,Vila Nova de Gaia,41.1333,-8.6167,302295.0,ez3f5bjdb6fu
2437,Porto,41.1495,-8.6108,237591.0,ez3fh51c1yf0
2756,Cascais,38.6969,-9.4206,206479.0,eyck1ss1u0gx
...,...,...,...,...,...
41315,Vila do Bispo,37.0825,-8.9119,5258.0,ey9dvnwt1xj8
41531,Mêda,40.9667,-7.2667,5202.0,ez6c49u83d7u
41563,Vila Nova de Paiva,40.8500,-7.7333,5176.0,ez682h8b3pct
41784,Alfândega da Fé,41.3500,-6.9667,5104.0,ez751rx10nev


In [62]:
from_df_to_json(final_df,'portuguese_cities.json')

In [63]:
with open('portuguese_cities.json') as json_file:
    data = json.load(json_file)
    print(data)

[{"city":"Lisbon","lat":38.708,"lng":-9.139,"population":506654.0,"geohash":"eycs0n94my5w"},{"city":"Sintra","lat":38.7974,"lng":-9.3904,"population":377835.0,"geohash":"eyckdqf55zd3"},{"city":"Vila Nova de Gaia","lat":41.1333,"lng":-8.6167,"population":302295.0,"geohash":"ez3f5bjdb6fu"},{"city":"Porto","lat":41.1495,"lng":-8.6108,"population":237591.0,"geohash":"ez3fh51c1yf0"},{"city":"Cascais","lat":38.6969,"lng":-9.4206,"population":206479.0,"geohash":"eyck1ss1u0gx"},{"city":"Loures","lat":38.8333,"lng":-9.1667,"population":205054.0,"geohash":"eyckzmmh27v1"},{"city":"Braga","lat":41.5503,"lng":-8.42,"population":181494.0,"geohash":"ez6h2kh8pc2c"},{"city":"Amadora","lat":38.75,"lng":-9.2333,"population":175136.0,"geohash":"eyckmyhx0e89"},{"city":"Matosinhos","lat":41.1867,"lng":-8.6844,"population":175478.0,"geohash":"ez3f63u1yv65"},{"city":"Almada","lat":38.6803,"lng":-9.1583,"population":174030.0,"geohash":"eyckp9e1h4s9"},{"city":"Oeiras","lat":38.697,"lng":-9.3017,"population":172