In [175]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
API_KEY = "your_api_key"

In [14]:
base_url = "https://geokeo.com/database/town/ke/<page_number>/"
data = []
for page_number in range(1,18):
    url = base_url.replace("<page_number>", str(page_number))
    response = requests.get(url)
    soup = BeautifulSoup(response.text,'html.parser')
    table = soup.find("table")
    rows = table.find_all("tr") #rows + header
    headers = [header.text for header in rows[0].find_all("th")]
    for row in rows[1:]: #starts at 1 since index 0 is the header
        cells = row.find_all("td")
        data.append([cell.text for cell in cells])
    
    time.sleep(2)

df=pd.DataFrame(data, columns=headers)
columns = ['Name','Latitude','Longitude']
towns_df = df[columns]

In [16]:
towns_df.tail()

Unnamed: 0,Name,Latitude,Longitude
335,Wargadud,2.3102605997537387,40.36219429999999
336,Watamu,-3.349999999732027,40.0166
337,Watamu,-3.3537987359446166,40.00961499248949
338,Webuye,0.598035423557572,34.77322439130398
339,Webuye,0.607627999936463,34.7687756


In [22]:
#How to setup the url
# https://maps.googleapis.com/maps/api/place/nearbysearch/json?
#location=<nairobi latitude and longitude>&radius=<radius to search in meters>&type=<business to search>&key=<api_key>

In [173]:
param_type = ["real_estate_agency","establishment"]
params = {
        "radius" : 3000,
        "key" : API_KEY
}


all_results = {}
# Lets create a datapipeline that usie googlemap apis to get data using locations etc
def agencies_establishments(): 
    # to not get a capping of 20 entries use next page token to get more values from the next page
    for _ , i in towns_df.iterrows():
        url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
        all_results[i["Name"]] = []
        params["location"] = f"{i["Latitude"]}, {i["Longitude"]}"
        print(f"Working on {i["Name"]}")
        for par_type in param_type:
            params["type"] = par_type
            while True:   
                response = requests.get(url,params=params)
                data = response.json()
                all_results[i["Name"]].extend(data.get("results",[]))
                next_page_token = data.get("next_page_token")
                if not next_page_token:
                    break
                time.sleep(5)
                if "next_page_token" in data:
                    params["pagetoken"] = data["next_page_token"]
                print(f"Results fetched = {len(all_results[i["Name"]])}")
    
    print(f"Total results fetched = {len(all_results)}")
    main_df = format_data(all_results)
    return main_df

def format_data(all_results):
    #Lets look at that data
    df=[]
    for _,i in towns_df.iterrows():
        results = all_results.get(f"{i["Name"]}",[])
        for result in results:
            df.append({
                "operational" : result.get("business_status", "N/A"),
                "name" : result.get("name", "N/A"),
                "total_ratings" : result.get("user_ratings_total", "N/A"),
                "avg_rating" : result.get("rating", "N/A"),
                "vicinity" : result.get("vicinity", "N/A"),
                "types" : result.get("types", "N/A"),
                "lat" : result.get("geometry", {}).get("location",{}).get("lat","N/A"),
                "lng" : result.get("geometry", {}).get("location",{}).get("lng","N/A"),
            })
    data = pd.DataFrame(df)
    return data

data = agencies_establishments()

Working on Abageranso
Working on Ahero
Results fetched = 22
Results fetched = 42
Working on Amagoro
Working on Athi River
Working on Athi River
Working on Awasi
Working on Awendo
Working on Babadogo
Working on Bajumwali
Working on Banana
Working on Banana
Working on Bangali
Working on Baolala
Working on Baragoi
Working on Beled Hawo
Working on Bisil
Working on Bissil
Working on Bomet
Working on Bondo
Working on Bondo
Working on Bukura
Working on Bulimbo/Harambee
Working on Bumala
Working on Buna
Working on Bungoma
Working on Bura East
Working on Bura West
Working on Busia
Working on Busia
Working on Butere
Working on Butere
Working on Bute Town
Working on Chaka
Working on Chakama
Working on Chavakali
Working on Chemelil
Working on Chepilat Town
Working on Chepkube
Working on Chesongo Center
Working on Chuka
Working on Chuka
Working on Dadaab
Working on Dadaab
Working on Dambas
Working on Danaba
Working on Dukana
Working on Dukana
Working on Dunto
Working on Egerton
Working on Eldama Ra

In [163]:
data.to_excel("towns_data_establishment_realEstate.xlsx", index=False)

In [177]:
first_10_data = list(all_results.values())[:10]
print(first_10_data)

[[{'business_status': 'OPERATIONAL', 'geometry': {'location': {'lat': 1.7729897, 'lng': 40.2959995}, 'viewport': {'northeast': {'lat': 1.774338680291502, 'lng': 40.2973484802915}, 'southwest': {'lat': 1.771640719708498, 'lng': 40.2946505197085}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/generic_business-71.png', 'icon_background_color': '#7B9EB0', 'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/generic_pinlet', 'name': 'Arbaqeranso Dispensary', 'photos': [{'height': 666, 'html_attributions': ['<a href="https://maps.google.com/maps/contrib/117701363042387504274">Joseph Onyango (QuantiScale)</a>'], 'photo_reference': 'AXQCQNTPyFBaDbhpP4-YNVCKqjgrmKRUFq9Ub1dYOhvrRik70xTvMy5kHe9J2-fc7X4UdRfxR9ZVE6LpwjNaObeucDCZNsH3iXvvoyg_-Z1h1oulWms90VSXmkcYuTpD1Rxb6c2Y38ftXBWP55V2Hm9ufPBVvGB56YqlMrRVovUljkSwP6QAvpsRYR3LRVElcabEZjcW6OPHPBWhXfsjxlZuXFM0ffXwYuyiqgowqDHc4RKHg7EliUwWWHBhtxMXEUBULCqZncgPyVNtVrnAK5NJVX10x5m8uqZ_qHtW1m8kG37eXxKcFOYsBsyX2OS