In [101]:
import math
import pandas as pd

from random import random
from time import sleep
from requests import get
from chromedriver_autoinstaller import install
from selenium.webdriver import  Chrome, ChromeOptions

In [None]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0'
}

session_key = None

In [71]:
def create_session():
    """ 
    Use headless Chrome/Chromium instance to scrape session key for Apple Maps from    
    data-map-printing-background attribute of Satellites Pro
    """
    print("Session creation in progress...")
    
    session_key_sample = r'1614125879_3642792122889215637_%2F_RwvhYZM5fKknqTdkXih2Wcu3s2f3Xea126uoIuDzUIY%3D'
    session_key_prefix = r'&accessKey='

    # session boilerplate setup 
    options = ChromeOptions()
    options.add_argument('headless')
    driver = Chrome(executable_path=install(cwd=True), options=options)
    driver.get('https://satellites.pro/USA_map#37.405074,-94.284668,5')

    # wait for request to complete
    sleep(5)

    # extract session key from html element
    base_element = driver.find_element_by_css_selector('#map-canvas .leaflet-mapkit-mutant')
    data = base_element.get_attribute('data-map-printing-background')
    prefix_begin = data.find(session_key_prefix)
    contents = data[prefix_begin + len(session_key_prefix): prefix_begin + int(1.5 * len(session_key_sample))] 
    return contents[:contents.find('&')]

In [80]:
session_key = create_session()

print('Session created with key: ' + str(session_key))

Session creation in progress...
Session created with key: 1618907030_1874956892394077209_%2F_4NOmzZ5Si%2BSy58%2Fbl%2F8wYF1RfvjbWCAoOR74Azysd9A%3D


In [98]:
class Tile:
    def __init__(self, name, coordinates, zoom=18):
        self.name = name 
        self.coordinates = coordinates
        self.zoom = zoom
        self.xy = self.coordinates_to_tile_xy()
    
    def coordinates_to_tile_xy(self):
        latitude, longitude = self.coordinates
        zoom_factor = 2 ** self.zoom
        latitude = latitude * math.pi / 180.0
        tile_x = zoom_factor * ((longitude + 180) / 360)
        tile_y = zoom_factor * (1 - (math.log(math.tan(latitude) + 1 / math.cos(latitude)) / math.pi)) / 2
        return round(tile_x), round(tile_y)

In [82]:
def scrape_tile(tile, output_dir):
    tile_x, tile_y = tile.xy
    
    # set request params
    request_url = f'https://sat-cdn3.apple-mapkit.com/tile?style=7&size=1&scale=1&z={tile.zoom}&x={tile_x}&y={tile_y}&v=9062&accessKey={session_key}'
    request = get(request_url, headers=headers)
    
    # Potential errors may include session key time out or forcible disconnect
    # which requires a new session to be started
    if 'Access Denied' in str(request.content):
        raise Exception(f'Access Denied for Tile {tile_x}_{tile_y}')
        
    with open(output_dir + '/' + tile.name + '.jpg', 'wb') as fout:
        fout.write(request.content) 
    return tile

In [None]:
dataframe = pd.read_csv('/Volumes/pakistancv/Survey/all_images_with_owner_names_matched.csv')
num_count = 0

print('Scraping tiles in progress...')

for idx, row in dataframe.iterrows():
    # if no coordinates avalible
    if pd.isnull(row['y']):
        continue 
        
    tile = Tile(str(row['pin_mask']), (row['y'], row['x']))
    scrape_tile(tile, 'satellite_tiles/' + row['district'].lower())
    num_count += 1
    sleep(random() * 3) # avoid ip block from bot behavior
    
print(str(num_count) + ' tiles scraped.')