In [90]:
from src import *

ModuleNotFoundError: No module named 'src'

In [73]:
import requests
import pandas as pd
from functools import reduce
from operator import add
from timezonefinder import TimezoneFinder

In [2]:
API_KEY = 'f0ec0abb9b8143349f1130303211901'
WWO_URI = f'http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key={API_KEY}&q={{lat}},{{lon}}&format=json&extra=localObsTime,utcDateTime,isDayTime&date={{start}}&enddate={{end}}&tp=1'

In [95]:
# grab locations historical data
def get_raw_weather_json(*, lat: float, lon: float, start: str, end: str) -> dict:
    """
    Return unprocessed json from WWO api - hourly frequency
    """
    uri = WWO_URI.format(lat=lat,lon=lon,start=start,end=end)
    r = requests.get(uri)
    
    return r.json()

def process_raw_weather_json(*, raw_json: dict, lat: float, lon: float) -> dict:
    
    json = raw_json['data']['weather']
    
    daily_data = []
    hourly_data = []
    for day in json:
        daily_data.append(process_daily_data(raw_daily_json=day))
        hourly_data.append(process_day_of_hourly_data(raw_daily_json=day))
    
    # turn list of lists into list via concat
    hourly_data = reduce(add,hourly_data)
    
    hourly_data = pd.DataFrame.from_dict(hourly_data)
    daily_data = pd.DataFrame.from_dict(daily_data)
    
    tf = TimezoneFinder()
    tz = tf.timezone_at(lng=lon, lat=lat)
    
    df = pd.merge(hourly_data,daily_data,how='left',on='date')
    df = df.assign(
        utc_datetime = df.apply(lambda x: parse_datetime(x.UTCdate,x.UTCtime),axis=1),
        local_datetime = df.apply(lambda x: parse_datetime(x.date,x.time,tz),axis=1)
    )
    
    df = df.drop(['date','time', 'UTCdate', 'UTCtime'], axis=1)
    
    return df

def process_day_of_hourly_data(*,raw_daily_json:dict):
    # make a copy
    json = dict(raw_daily_json)
    
    date = json['date'] # need to join to the daily attributes
    hourly = json['hourly']
    
    hourly = [process_hourly_data(raw_hourly_json=raw_hourly_json,date=date) for raw_hourly_json in hourly]
    
    return hourly

def process_hourly_data(*, raw_hourly_json: dict, date: str):
    
    json = dict(raw_hourly_json)
    
    del json['weatherIconUrl'] # dont need!
    json['weatherDesc'] = json['weatherDesc'][0]['value']
    json['date'] = date
    
    return json
    
def process_daily_data(*,raw_daily_json:dict):
    
    # make a copy
    json = dict(raw_daily_json)
    
    # flatten the astro data
    astro = json['astronomy'][0]
    json.update(astro)
    
    # remove unneeded keys
    del json['astronomy']
    del json['hourly']
    
    return json

def parse_datetime(date: str, hour: str, tz: str = "UTC") -> str:
    """[summary]

    Args:
        date (str): [description]
        hour (str): [description]
        tz (str, optional): [description]. Defaults to None.

    Returns:
        str: [description]
    """
    datetime = pendulum.from_format(
        f"{date} {int(int(hour) / 100)}", "YYYY-MM-DD H", tz=tz
    )

    return str(datetime)

In [94]:
tz = 23
tz if tz else 'UTC'

23

In [82]:
raw_json = get_raw_weather_json(lat=33.0047,lon=115.8448,start='2020-01-01',end='2020-01-18')
raw_json

{'data': {'request': [{'type': 'LatLon', 'query': 'Lat 33.00 and Lon 115.84'}],
  'weather': [{'date': '2020-01-01',
    'astronomy': [{'sunrise': '07:20 AM',
      'sunset': '05:20 PM',
      'moonrise': '11:24 AM',
      'moonset': '10:58 PM',
      'moon_phase': 'Waxing Crescent',
      'moon_illumination': '37'}],
    'maxtempC': '5',
    'maxtempF': '41',
    'mintempC': '-2',
    'mintempF': '29',
    'avgtempC': '3',
    'avgtempF': '37',
    'totalSnow_cm': '0.0',
    'sunHour': '3.8',
    'uvIndex': '1',
    'hourly': [{'time': '0',
      'UTCdate': '2019-12-31',
      'UTCtime': '1600',
      'isdaytime': 'no',
      'tempC': '0',
      'tempF': '33',
      'windspeedMiles': '6',
      'windspeedKmph': '10',
      'winddirDegree': '124',
      'winddir16Point': 'ESE',
      'weatherCode': '119',
      'weatherIconUrl': [{'value': 'http://cdn.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0003_white_cloud.png'}],
      'weatherDesc': [{'value': 'Cloudy'}],
      'prec

In [96]:
df = process_raw_weather_json(raw_json=raw_json,lat=33.0047,lon=115.8448)
df

Unnamed: 0,isdaytime,tempC,tempF,windspeedMiles,windspeedKmph,winddirDegree,winddir16Point,weatherCode,weatherDesc,precipMM,...,sunHour,uvIndex_y,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination,utc_datetime,local_datetime
0,no,0,33,6,10,124,ESE,119,Cloudy,0.0,...,3.8,1,07:20 AM,05:20 PM,11:24 AM,10:58 PM,Waxing Crescent,37,2019-12-31T16:00:00+00:00,2020-01-01T00:00:00+08:00
1,no,0,32,6,10,123,ESE,119,Cloudy,0.0,...,3.8,1,07:20 AM,05:20 PM,11:24 AM,10:58 PM,Waxing Crescent,37,2019-12-31T17:00:00+00:00,2020-01-01T01:00:00+08:00
2,no,0,32,6,10,123,ESE,116,Partly cloudy,0.0,...,3.8,1,07:20 AM,05:20 PM,11:24 AM,10:58 PM,Waxing Crescent,37,2019-12-31T18:00:00+00:00,2020-01-01T02:00:00+08:00
3,no,-0,32,6,10,123,ESE,116,Partly cloudy,0.0,...,3.8,1,07:20 AM,05:20 PM,11:24 AM,10:58 PM,Waxing Crescent,37,2019-12-31T19:00:00+00:00,2020-01-01T03:00:00+08:00
4,no,-0,32,6,10,122,ESE,116,Partly cloudy,0.0,...,3.8,1,07:20 AM,05:20 PM,11:24 AM,10:58 PM,Waxing Crescent,37,2019-12-31T20:00:00+00:00,2020-01-01T04:00:00+08:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,no,6,43,5,8,202,SSW,116,Partly cloudy,0.0,...,8.7,3,07:19 AM,05:35 PM,12:43 AM,12:24 PM,Last Quarter,40,2020-01-18T11:00:00+00:00,2020-01-18T19:00:00+08:00
428,no,6,42,5,9,198,SSW,116,Partly cloudy,0.0,...,8.7,3,07:19 AM,05:35 PM,12:43 AM,12:24 PM,Last Quarter,40,2020-01-18T12:00:00+00:00,2020-01-18T20:00:00+08:00
429,no,5,41,6,9,194,SSW,116,Partly cloudy,0.0,...,8.7,3,07:19 AM,05:35 PM,12:43 AM,12:24 PM,Last Quarter,40,2020-01-18T13:00:00+00:00,2020-01-18T21:00:00+08:00
430,no,5,41,6,10,203,SSW,116,Partly cloudy,0.0,...,8.7,3,07:19 AM,05:35 PM,12:43 AM,12:24 PM,Last Quarter,40,2020-01-18T14:00:00+00:00,2020-01-18T22:00:00+08:00


In [66]:
df.apply(lambda x: parse_datetime(x.UTCdate,x.UTCtime),axis=1)

0      2019-12-31T16:00:00+00:00
1      2019-12-31T17:00:00+00:00
2      2019-12-31T18:00:00+00:00
3      2019-12-31T19:00:00+00:00
4      2019-12-31T20:00:00+00:00
                 ...            
427    2020-01-18T11:00:00+00:00
428    2020-01-18T12:00:00+00:00
429    2020-01-18T13:00:00+00:00
430    2020-01-18T14:00:00+00:00
431    2020-01-18T15:00:00+00:00
Length: 432, dtype: object

In [64]:
def parse_datetime(date: str, hour: str) -> str:
    datetime = str(pendulum.from_format(f'{date} {int(int(hour) / 100)}', 'YYYY-MM-DD H'))
    return datetime

In [60]:
date,hour = df.UTCdate[17],df.UTCtime[17]
date,hour

('2020-01-01', '900')

In [63]:
import pendulum

print(f'{date} {int(hour) / 100}')
str(pendulum.from_format(f'{date} {int(int(hour) / 100)}', 'YYYY-MM-DD H'))

2020-01-01 9.0


'2020-01-01T09:00:00+00:00'

In [121]:
df = pd.merge(hourly,daily,how='left',on='date')

In [175]:
import pendulum

start = pendulum.datetime(2016,1, 1)
end = pendulum.yesterday()
period = pendulum.period(start, end)

list(period.range("days",amount=35))

[DateTime(2016, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 2, 5, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 3, 11, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 4, 15, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 5, 20, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 6, 24, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 7, 29, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 9, 2, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 10, 7, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 11, 11, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2016, 12, 16, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 1, 20, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 2, 24, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 3, 31, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 5, 5, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 6, 9, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 7, 14, 0, 0, 0, tzinfo=Timezone('UTC')),
 DateTime(2017, 8, 18, 0, 0, 0, tz

In [150]:
pendulum.Period??

[0;31mInit signature:[0m [0mpendulum[0m[0;34m.[0m[0mPeriod[0m[0;34m([0m[0mstart[0m[0;34m,[0m [0mend[0m[0;34m,[0m [0mabsolute[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mPeriod[0m[0;34m([0m[0mDuration[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m"""[0m
[0;34m    Duration class that is aware of the datetimes that generated the[0m
[0;34m    time difference.[0m
[0;34m    """[0m[0;34m[0m
[0;34m[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0m__new__[0m[0;34m([0m[0mcls[0m[0;34m,[0m [0mstart[0m[0;34m,[0m [0mend[0m[0;34m,[0m [0mabsolute[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;32mif[0m [0misinstance[0m[0;34m([0m[0mstart[0m[0;34m,[0m [0mdatetime[0m[0;34m)[0m [0;32mand[0m [0misinstance[0m[0;34m([0m[0mend[0m[0;34m,[0m [0mdatetime[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        

In [87]:
raw_json['data']['weather'][2]['hourly']

[{'time': '0',
  'UTCdate': '2021-01-02',
  'UTCtime': '1600',
  'isdaytime': 'no',
  'tempC': '1',
  'tempF': '34',
  'windspeedMiles': '7',
  'windspeedKmph': '12',
  'winddirDegree': '70',
  'winddir16Point': 'ENE',
  'weatherCode': '113',
  'weatherIconUrl': [{'value': 'http://cdn.worldweatheronline.com/images/wsymbols01_png_64/wsymbol_0008_clear_sky_night.png'}],
  'weatherDesc': [{'value': 'Clear'}],
  'precipMM': '0.0',
  'precipInches': '0.0',
  'humidity': '76',
  'visibility': '10',
  'visibilityMiles': '6',
  'pressure': '1031',
  'pressureInches': '31',
  'cloudcover': '3',
  'HeatIndexC': '1',
  'HeatIndexF': '34',
  'DewPointC': '-3',
  'DewPointF': '27',
  'WindChillC': '-3',
  'WindChillF': '28',
  'WindGustMiles': '12',
  'WindGustKmph': '19',
  'FeelsLikeC': '-3',
  'FeelsLikeF': '28',
  'uvIndex': '1'},
 {'time': '100',
  'UTCdate': '2021-01-02',
  'UTCtime': '1700',
  'isdaytime': 'no',
  'tempC': '1',
  'tempF': '34',
  'windspeedMiles': '8',
  'windspeedKmph': '12

In [69]:
reduce(add,[[12,3],[2,3],[3,5,6]])

[12, 3, 2, 3, 3, 5, 6]

In [43]:
daily_data = []
for daily_json in process_raw_weather_json(raw_json=raw_json):
    daily_data.append(process_daily_data(raw_daily_json=daily_json))

In [45]:
import pandas as pd

pd.DataFrame.from_dict(daily_data)

Unnamed: 0,date,maxtempC,maxtempF,mintempC,mintempF,avgtempC,avgtempF,totalSnow_cm,sunHour,uvIndex,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination
0,2021-01-01,6,42,-3,27,2,36,0.0,8.7,2,07:20 AM,05:21 PM,07:22 PM,09:02 AM,Waning Gibbous,79
1,2021-01-02,7,44,1,34,4,39,0.0,8.7,2,07:20 AM,05:22 PM,08:25 PM,09:46 AM,Waning Gibbous,71
2,2021-01-03,6,43,0,33,4,39,0.0,7.1,2,07:20 AM,05:23 PM,09:30 PM,10:25 AM,Waning Gibbous,64
3,2021-01-04,6,43,1,34,4,39,0.0,3.8,2,07:20 AM,05:23 PM,10:34 PM,11:00 AM,Waning Gibbous,57
4,2021-01-05,4,39,1,34,3,37,0.0,3.8,1,07:20 AM,05:24 PM,11:39 PM,11:34 AM,Last Quarter,50
5,2021-01-06,4,39,-3,26,2,35,0.0,8.7,2,07:21 AM,05:25 PM,No moonrise,12:06 PM,Last Quarter,42
6,2021-01-07,-5,24,-10,15,-7,19,0.0,3.8,2,07:21 AM,05:26 PM,12:43 AM,12:38 PM,Last Quarter,35
7,2021-01-08,2,35,-6,22,-1,30,0.0,8.7,2,07:21 AM,05:27 PM,01:50 AM,01:14 PM,Last Quarter,28
8,2021-01-09,3,38,-4,25,0,32,0.0,8.7,2,07:21 AM,05:28 PM,02:58 AM,01:53 PM,Waning Crescent,20
9,2021-01-10,4,38,-2,29,1,34,0.1,7.1,2,07:21 AM,05:28 PM,04:08 AM,02:37 PM,Waning Crescent,13


In [97]:
from typing import List

In [99]:
df.dtypes

isdaytime            object
tempC                object
tempF                object
windspeedMiles       object
windspeedKmph        object
winddirDegree        object
winddir16Point       object
weatherCode          object
weatherDesc          object
precipMM             object
precipInches         object
humidity             object
visibility           object
visibilityMiles      object
pressure             object
pressureInches       object
cloudcover           object
HeatIndexC           object
HeatIndexF           object
DewPointC            object
DewPointF            object
WindChillC           object
WindChillF           object
WindGustMiles        object
WindGustKmph         object
FeelsLikeC           object
FeelsLikeF           object
uvIndex_x            object
maxtempC             object
maxtempF             object
mintempC             object
mintempF             object
avgtempC             object
avgtempF             object
totalSnow_cm         object
sunHour             