## Downloading the next two days of data for weather.gov

In [1]:
import sys
from datetime import datetime

import requests
import pytz
import pandas as pd

sys.path.append("..")
import src.download as dl

In [2]:
c_park = {"latitude":40.7823,
         "longitude":-87.7421}

In [3]:
base_url = "https://api.weather.gov/points/{latitude},{longitude}"

In [4]:
def format_url(base_url, latitude, longitude):
    url = base_url.format(latitude=latitude, longitude=longitude)
    return f"{url}"

In [5]:
url = format_url(base_url, **c_park)

In [6]:
url

'https://api.weather.gov/points/40.7823,-87.7421'

In [7]:
d = requests.get(url)

In [8]:
d

<Response [200]>

In [9]:
type(d)

requests.models.Response

In [10]:
d.json()

{'@context': ['https://geojson.org/geojson-ld/geojson-context.jsonld',
  {'@version': '1.1',
   'wx': 'https://api.weather.gov/ontology#',
   's': 'https://schema.org/',
   'geo': 'http://www.opengis.net/ont/geosparql#',
   'unit': 'http://codes.wmo.int/common/unit/',
   '@vocab': 'https://api.weather.gov/ontology#',
   'geometry': {'@id': 's:GeoCoordinates', '@type': 'geo:wktLiteral'},
   'city': 's:addressLocality',
   'state': 's:addressRegion',
   'distance': {'@id': 's:Distance', '@type': 's:QuantitativeValue'},
   'bearing': {'@type': 's:QuantitativeValue'},
   'value': {'@id': 's:value'},
   'unitCode': {'@id': 's:unitCode', '@type': '@id'},
   'forecastOffice': {'@type': '@id'},
   'forecastGridData': {'@type': '@id'},
   'publicZone': {'@type': '@id'},
   'county': {'@type': '@id'}}],
 'id': 'https://api.weather.gov/points/40.7823,-87.7420999',
 'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-87.7420999, 40.7823]},
 'properties': {'@id': 'https://api.weather

In [11]:
hourly_url = d.json()["properties"]["forecastHourly"]

In [12]:
d = requests.get(hourly_url)

In [13]:
d

<Response [200]>

In [14]:
d.json()

{'@context': ['https://geojson.org/geojson-ld/geojson-context.jsonld',
  {'@version': '1.1',
   'wx': 'https://api.weather.gov/ontology#',
   'geo': 'http://www.opengis.net/ont/geosparql#',
   'unit': 'http://codes.wmo.int/common/unit/',
   '@vocab': 'https://api.weather.gov/ontology#'}],
 'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.7673534, 40.7986966],
    [-87.76889800000001, 40.7767939],
    [-87.7399666, 40.7756218],
    [-87.7384159, 40.7975245],
    [-87.7673534, 40.7986966]]]},
 'properties': {'updated': '2021-10-13T14:15:21+00:00',
  'units': 'us',
  'forecastGenerator': 'HourlyForecastGenerator',
  'generatedAt': '2021-10-13T14:20:48+00:00',
  'updateTime': '2021-10-13T14:15:21+00:00',
  'validTimes': '2021-10-13T08:00:00+00:00/P7DT17H',
  'elevation': {'unitCode': 'wmoUnit:m', 'value': 185.0136},
  'periods': [{'number': 1,
    'name': '',
    'startTime': '2021-10-13T09:00:00-05:00',
    'endTime': '2021-10-13T10:00:00-05:00',
    'isDaytime'

In [15]:
def get_url(url):
    
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        print(f"There has been an error: {err}")
        return err
    return response

In [16]:
def wrapper(location_json):
    url = format_url(base_url, **location_json)
    
    r = get_url(url)
    
    forecast_url = r.json()["properties"]["forecastHourly"]
    
    r = get_url(forecast_url)
    
    return r
    
j = wrapper(c_park)

In [17]:
j.json()

{'@context': ['https://geojson.org/geojson-ld/geojson-context.jsonld',
  {'@version': '1.1',
   'wx': 'https://api.weather.gov/ontology#',
   'geo': 'http://www.opengis.net/ont/geosparql#',
   'unit': 'http://codes.wmo.int/common/unit/',
   '@vocab': 'https://api.weather.gov/ontology#'}],
 'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.7673534, 40.7986966],
    [-87.76889800000001, 40.7767939],
    [-87.7399666, 40.7756218],
    [-87.7384159, 40.7975245],
    [-87.7673534, 40.7986966]]]},
 'properties': {'updated': '2021-10-13T14:15:21+00:00',
  'units': 'us',
  'forecastGenerator': 'HourlyForecastGenerator',
  'generatedAt': '2021-10-13T14:20:48+00:00',
  'updateTime': '2021-10-13T14:15:21+00:00',
  'validTimes': '2021-10-13T08:00:00+00:00/P7DT17H',
  'elevation': {'unitCode': 'wmoUnit:m', 'value': 185.0136},
  'periods': [{'number': 1,
    'name': '',
    'startTime': '2021-10-13T09:00:00-05:00',
    'endTime': '2021-10-13T10:00:00-05:00',
    'isDaytime'

### Timezone

Important for our purposes is to have two days in advance. I hate timezones, but they are a necessary evil.

From https://pypi.org/project/pytz/

`The preferred way of dealing with times is to always work in UTC, converting to localtime only when generating output to be read by humans`

In [18]:
datetime.now()

datetime.datetime(2021, 10, 13, 16, 22, 6, 566526)

In [19]:
period = j.json()['properties']['periods'][0]
period

{'number': 1,
 'name': '',
 'startTime': '2021-10-13T09:00:00-05:00',
 'endTime': '2021-10-13T10:00:00-05:00',
 'isDaytime': True,
 'temperature': 60,
 'temperatureUnit': 'F',
 'temperatureTrend': None,
 'windSpeed': '5 mph',
 'windDirection': 'SSE',
 'icon': 'https://api.weather.gov/icons/land/day/bkn?size=small',
 'shortForecast': 'Partly Sunny',
 'detailedForecast': ''}

In [20]:
period['startTime']

'2021-10-13T09:00:00-05:00'

In [21]:
start_time = datetime.fromisoformat(period['startTime'])
start_time

datetime.datetime(2021, 10, 13, 9, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400)))

In [22]:
utc = pytz.timezone('UTC')

In [23]:
start_time.astimezone(utc)

datetime.datetime(2021, 10, 13, 14, 0, tzinfo=<UTC>)

### To DataFrame

I was hoping to avoid putting this into a dataframe, but it will make my life far easier.

In [24]:
now = datetime.now().astimezone(utc)

In [25]:
df = pd.DataFrame.from_dict(j.json()['properties']['periods'])

In [26]:
df['startTime'] = pd.to_datetime(df['startTime'], utc=True)

In [27]:
df['endTime'] = pd.to_datetime(df['endTime'], utc=True)

In [28]:
df['observed_time'] = now

In [29]:
df.drop(["number", "name", "icon"], axis=1, inplace=True)

In [30]:
df

Unnamed: 0,startTime,endTime,isDaytime,temperature,temperatureUnit,temperatureTrend,windSpeed,windDirection,shortForecast,detailedForecast,observed_time
0,2021-10-13 14:00:00+00:00,2021-10-13 15:00:00+00:00,True,60,F,,5 mph,SSE,Partly Sunny,,2021-10-13 14:22:06.610671+00:00
1,2021-10-13 15:00:00+00:00,2021-10-13 16:00:00+00:00,True,66,F,,10 mph,SSE,Partly Sunny,,2021-10-13 14:22:06.610671+00:00
2,2021-10-13 16:00:00+00:00,2021-10-13 17:00:00+00:00,True,71,F,,10 mph,SSE,Partly Sunny,,2021-10-13 14:22:06.610671+00:00
3,2021-10-13 17:00:00+00:00,2021-10-13 18:00:00+00:00,True,73,F,,10 mph,SSE,Chance Rain Showers,,2021-10-13 14:22:06.610671+00:00
4,2021-10-13 18:00:00+00:00,2021-10-13 19:00:00+00:00,True,73,F,,10 mph,SSE,Chance Showers And Thunderstorms,,2021-10-13 14:22:06.610671+00:00
...,...,...,...,...,...,...,...,...,...,...,...
151,2021-10-19 21:00:00+00:00,2021-10-19 22:00:00+00:00,True,69,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:22:06.610671+00:00
152,2021-10-19 22:00:00+00:00,2021-10-19 23:00:00+00:00,True,67,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:22:06.610671+00:00
153,2021-10-19 23:00:00+00:00,2021-10-20 00:00:00+00:00,False,64,F,,5 mph,SW,Partly Cloudy,,2021-10-13 14:22:06.610671+00:00
154,2021-10-20 00:00:00+00:00,2021-10-20 01:00:00+00:00,False,60,F,,5 mph,SSW,Partly Cloudy,,2021-10-13 14:22:06.610671+00:00


### Initial Save to CSV

Running this will overwrite the original.

In [31]:
#df.to_csv("../data/central_park.csv", index=False)

### Try reading

In [32]:
df = pd.read_csv("../data/central_park.csv", parse_dates=['startTime', 'endTime','observed_time'])

In [33]:
df['startTime']

0     2021-10-13 09:00:00+00:00
1     2021-10-13 10:00:00+00:00
2     2021-10-13 11:00:00+00:00
3     2021-10-13 12:00:00+00:00
4     2021-10-13 13:00:00+00:00
                 ...           
463   2021-10-19 21:00:00+00:00
464   2021-10-19 22:00:00+00:00
465   2021-10-19 23:00:00+00:00
466   2021-10-20 00:00:00+00:00
467   2021-10-20 01:00:00+00:00
Name: startTime, Length: 468, dtype: datetime64[ns, UTC]

### Wrapping it up

In [34]:
locations = [
    {"name":"central_park",
     "lat_lon":{"latitude":40.7823,
                 "longitude":-87.7421}}
]

In [35]:
def weather_dataframe(raw_weather):
    
    now = datetime.now().astimezone(utc)
    
    df = pd.DataFrame.from_dict(raw_weather.json()['properties']['periods'])
    
    df['startTime'] = pd.to_datetime(df['startTime'], utc=True)
    
    df['endTime'] = pd.to_datetime(df['endTime'], utc=True)
    
    df['observed_time'] = now
    
    df.drop(["number", "name", "icon"], axis=1, inplace=True)
    
    return df

In [36]:
def weather_forecasts(location_json):
    raw_forecasts = wrapper(location_json['lat_lon'])
    
    df = weather_dataframe(raw_forecasts)
    
    return df
    
df = weather_forecasts(locations[0])

### Write and append

Okay I will 90% sure be using `S3` so I have to read it, append and then write it.

In [37]:
df_old = pd.read_csv("../data/central_park.csv", parse_dates=['startTime', 'endTime','observed_time'])

In [38]:
df_old

Unnamed: 0,startTime,endTime,isDaytime,temperature,temperatureUnit,temperatureTrend,windSpeed,windDirection,shortForecast,detailedForecast,observed_time
0,2021-10-13 09:00:00+00:00,2021-10-13 10:00:00+00:00,False,54,F,,5 mph,S,Patchy Fog,,2021-10-13 09:42:22.306617+00:00
1,2021-10-13 10:00:00+00:00,2021-10-13 11:00:00+00:00,False,53,F,,5 mph,SSE,Patchy Fog,,2021-10-13 09:42:22.306617+00:00
2,2021-10-13 11:00:00+00:00,2021-10-13 12:00:00+00:00,True,53,F,,5 mph,SSE,Patchy Fog,,2021-10-13 09:42:22.306617+00:00
3,2021-10-13 12:00:00+00:00,2021-10-13 13:00:00+00:00,True,52,F,,5 mph,SE,Patchy Fog,,2021-10-13 09:42:22.306617+00:00
4,2021-10-13 13:00:00+00:00,2021-10-13 14:00:00+00:00,True,55,F,,5 mph,SSE,Patchy Fog,,2021-10-13 09:42:22.306617+00:00
...,...,...,...,...,...,...,...,...,...,...,...
463,2021-10-19 21:00:00+00:00,2021-10-19 22:00:00+00:00,True,69,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:20:49.801568+00:00
464,2021-10-19 22:00:00+00:00,2021-10-19 23:00:00+00:00,True,67,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:20:49.801568+00:00
465,2021-10-19 23:00:00+00:00,2021-10-20 00:00:00+00:00,False,64,F,,5 mph,SW,Partly Cloudy,,2021-10-13 14:20:49.801568+00:00
466,2021-10-20 00:00:00+00:00,2021-10-20 01:00:00+00:00,False,60,F,,5 mph,SSW,Partly Cloudy,,2021-10-13 14:20:49.801568+00:00


In [39]:
df = pd.concat([df_old, df], axis=0)

In [40]:
csv_name = locations[0]["name"] + ".csv"
csv_path = "../data/" + csv_name
csv_path

'../data/central_park.csv'

In [41]:
df.to_csv(csv_path, index=False)

### Converted to module, let's try it

In [42]:
dl.weather_forecasts(locations[0])

Unnamed: 0,startTime,endTime,isDaytime,temperature,temperatureUnit,temperatureTrend,windSpeed,windDirection,shortForecast,detailedForecast,observed_time
0,2021-10-13 14:00:00+00:00,2021-10-13 15:00:00+00:00,True,60,F,,5 mph,SSE,Partly Sunny,,2021-10-13 14:22:07.047509+00:00
1,2021-10-13 15:00:00+00:00,2021-10-13 16:00:00+00:00,True,66,F,,10 mph,SSE,Partly Sunny,,2021-10-13 14:22:07.047509+00:00
2,2021-10-13 16:00:00+00:00,2021-10-13 17:00:00+00:00,True,71,F,,10 mph,SSE,Partly Sunny,,2021-10-13 14:22:07.047509+00:00
3,2021-10-13 17:00:00+00:00,2021-10-13 18:00:00+00:00,True,73,F,,10 mph,SSE,Chance Rain Showers,,2021-10-13 14:22:07.047509+00:00
4,2021-10-13 18:00:00+00:00,2021-10-13 19:00:00+00:00,True,73,F,,10 mph,SSE,Chance Showers And Thunderstorms,,2021-10-13 14:22:07.047509+00:00
...,...,...,...,...,...,...,...,...,...,...,...
151,2021-10-19 21:00:00+00:00,2021-10-19 22:00:00+00:00,True,69,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:22:07.047509+00:00
152,2021-10-19 22:00:00+00:00,2021-10-19 23:00:00+00:00,True,67,F,,10 mph,SW,Mostly Sunny,,2021-10-13 14:22:07.047509+00:00
153,2021-10-19 23:00:00+00:00,2021-10-20 00:00:00+00:00,False,64,F,,5 mph,SW,Partly Cloudy,,2021-10-13 14:22:07.047509+00:00
154,2021-10-20 00:00:00+00:00,2021-10-20 01:00:00+00:00,False,60,F,,5 mph,SSW,Partly Cloudy,,2021-10-13 14:22:07.047509+00:00
