In [2]:
import pandas as pd
import urllib.request, json
from urllib.error import HTTPError
import numpy as np

from datetime import datetime as dt
import time
from datetime import timedelta, date
import re
import pytz
import concurrent.futures
import folium

def polygon_to_map(coordinates, zoom_start = 10, invert = False): #coordinates in the format of [[lat, lon]...]
    if invert:
        coordinates = [item[::-1] for item in coordinates]
        
    lat_mean = sum([item[0] for item in coordinates])/len(coordinates)
    long_mean = sum([item[1] for item in coordinates ])/len(coordinates)

    m = folium.Map(location=[lat_mean, long_mean], zoom_start=zoom_start)

    folium.Polygon(
        locations = coordinates,  # Flip lon/lat to lat/lon
        color='blue',
        fill=True,
        fill_color='cyan',
        fill_opacity=0.5
    ).add_to(m)
    return(m)

def return_json(url_api):
    try:
        with urllib.request.urlopen(url_api) as url:
            data = json.load(url)
            return data
    except HTTPError as e:
        if e.code == 404:
            return {"error": "404 Not Found"}
        else:
            return {"error": f"HTTP Error {e.code}"}
    except Exception as e:
        return {"error": str(e)}
def weather_api_coordinates(lat, long):
    url_api = "https://api.weather.gov/points/" + str(lat) + ',' + str(long)
    return(return_json(url_api))

def wait(delta_time = 1):
    start_time = time.time()
    while start_time + delta_time > time.time():
        continue

def is_in_US(coord, i = 0):
    res = weather_api_coordinates(coord[0], coord[1])
    if '@context' in res:
        if res['properties']['gridId'] != None:
            if i != 0:
                print('\n 403 RESOLVED!!!!!!!!!!!!!!!!!\n')
            return({coord : True,'res':res})
        else:
            return({coord: False, 'res': {'error': 'no gridid'}})
    elif(res['error'] == 'HTTP Error 403'): #{'error': '404 Not Found'}
        print('403', i, round(coord[0], 5), round(coord[1], 5) )
        if i == 5:
            return({coord: False, 'res': "request run for 5 times with 403 returns" })
        wait(5)
        i += 1
        return(is_in_US(coord, i))
    return({coord: False, 'res':res})

NW_Extreme = (49.384534, -124.761535)
NE_Extreme = (49.384534, -66.948743)
SE_Extreme = (24.506153, -66.948743)
SW_Extreme = (24.506153, -124.761535)
exterm = [NW_Extreme, NE_Extreme, SE_Extreme, SW_Extreme]
#(lat, lon)

lat_max = 49.384534
lat_min = 24.506153
lon_max = -124.761535
lon_min = -66.948743

delta_distance = 800#km
delta = delta_distance/111
#travelling from NW to East and South.
lat = lat_max
lon = lon_max
grid = []
while lat > lat_min:
    lon = lon_max
    while lon < lon_min:
        grid.append((lat, lon))
        lon = lon + delta
    lat = lat - delta
len(grid)

36

In [3]:
polygon_to_map(exterm, 4)


In [4]:
start = time.time()
in_US = []
results = []
bucket_size = 32
for bucket in range(len(grid)//bucket_size+1):
    if bucket%bucket_size == 0:
        print(bucket_size*bucket)
    coordinates = grid[bucket_size*bucket:bucket_size*bucket + bucket_size]
    with concurrent.futures.ThreadPoolExecutor(max_workers=64) as executor:
        results += list(executor.map(is_in_US, coordinates))
#    print(10*bucket, 10*bucket + 10)

    
'''
for coords in grid:
    if list(is_in_US(coords).values())[0]:
        in_US.append(coords)
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
    results = list(executor.map(is_in_US, grid))
'''
print(time.time()-start)

0
13.31611180305481


In [5]:
import pickle 
f_name = './grid_results/grid_results_' + str(delta_distance)+'.pkl'
with open(f_name, 'wb') as f:
    pickle.dump(results, f)

In [6]:
final_results = []
cols = ['lat', 'lon', 'url', 'state' ]
for res in results:
    if 'error' not in res['res']:
        coords = list(res.keys())[0]
        final_results.append([coords[0], coords[1], res['res']['id'], res['res']['properties']['relativeLocation']['properties']['state']])

final_results[0]

[42.1773267927928,
 -124.761535,
 'https://api.weather.gov/points/42.1773,-124.7615',
 'OR']

In [7]:
final_results = pd.DataFrame(final_results, columns=cols)
csv_f_name = "grid_results_" + str(delta_distance) + ".csv"
final_results.to_csv(csv_f_name)

In [8]:
final_results.shape

(15, 4)

In [9]:
import os
import multiprocessing

# Total logical CPUs/cores available
print("Logical CPUs (threads):", os.cpu_count())

# Physical cores (useful for CPU-bound tasks)
print("Physical cores:", multiprocessing.cpu_count())

Logical CPUs (threads): 16
Physical cores: 16
