In [1]:
import datetime

import pandas as pd
import numpy as np

import asyncio
import requests_html
from requests.adapters import HTTPAdapter, Retry
from requests.exceptions import MissingSchema
from requests_html import AsyncHTMLSession
from urllib3.exceptions import MaxRetryError

from tqdm.auto import tqdm

temperature - Temperature (°C)

dew_point - Dew Point (°C)

heat_index - Heat Index (°C)

humidity - Humidity (%)

pressure - Pressure (hPa)

visibility - Visibility (m)

gust - Gust (km/h)

wind_speed - Wind Speed (km/h)

precipitation - Precipitation (mm)

uv_index - UV Index

In [2]:
async def getResponse(date_chunk):
    
    pages = (requestHeaders(weather_date=date) for date in date_chunk)
    return await asyncio.gather(*pages)

async def requestHeaders(weather_date):
    asession = AsyncHTMLSession()
    
    headers = {
        'authority': 'api.weather.com',
        'accept': 'application/json, text/plain, */*',
        'accept-language': 'en-GB,en;q=0.5',
        'origin': 'https://www.wunderground.com',
        'referer': 'https://www.wunderground.com/',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'cross-site',
        'sec-gpc': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    }
    
    params = {
        'apiKey': 'e1f10a1e78da46f5b10a1e78da96f525',
        'units': 'm',
        'startDate': weather_date,
    }

    retries = Retry(total=10, backoff_factor=0.05, status_forcelist=[500, 502, 503, 504, 524])
    adapter = HTTPAdapter(max_retries=retries)

    asession.mount("https://", adapter)
    asession.mount("http://", adapter)

    try:
        resp = await asession.get(
            'https://api.weather.com/v1/location/OJAM:9:JO/observations/historical.json', 
            params=params,
            headers=headers
        )
        
        return parse_data(resp=resp.json())

    except (MaxRetryError, requests_html.requests.exceptions.RetryError, MissingSchema) as e:              
        return "Response Error"
    
######################################################################################################
def parse_data(resp):

    rows = resp['observations']

    hr_weather_data = []

    for row in rows:
        hr_weather_data.append({
            #https://stackoverflow.com/questions/12400256/converting-epoch-time-into-the-datetime
            'date' : datetime.datetime.fromtimestamp(row['valid_time_gmt']).strftime('%Y-%m-%d %H:%M:%S'),
            'temperature' : row['temp'],
            'weather_condition' : row['wx_phrase'],
            'dew_point' : row['dewPt'],
            'heat_index' : row['heat_index'],
            'humidity' : row['rh'],
            'pressure' : row['pressure'],
            'visibility' : row['vis'],
            'wind_condition' : row['wdir_cardinal'],
            'gust' : row['gust'],
            'wind_speed' : row['wspd'],
            'precipitation' : row['precip_total'],
            'uv_desc' : row['uv_desc'],
            'uv_index' : row['uv_index'],
            'clouds_condition' : row['clds']
        })

    return pd.DataFrame(hr_weather_data)

In [3]:
starting_from = '2014-01-01'
ending_till = '2021-12-31'

all_dates = pd.date_range(starting_from, ending_till, freq = '1d').strftime('%Y%m%d')

chunks = np.round(len(all_dates)/100) + 1
date_chunks = np.array_split(all_dates, chunks)

daily_data = []
        
count = 1
for chunk in tqdm(date_chunks):
    print(f"{count}: Requesting weather data from {chunk[0]} to {chunk[-1]}")
    resp = await getResponse(date_chunk=chunk)
    daily_data.extend(resp)
    count += 1

  0%|          | 0/30 [00:00<?, ?it/s]

1: Requesting weather data from 20140101 to 20140408
2: Requesting weather data from 20140409 to 20140715
3: Requesting weather data from 20140716 to 20141021
4: Requesting weather data from 20141022 to 20150127
5: Requesting weather data from 20150128 to 20150505
6: Requesting weather data from 20150506 to 20150811
7: Requesting weather data from 20150812 to 20151117
8: Requesting weather data from 20151118 to 20160223
9: Requesting weather data from 20160224 to 20160531
10: Requesting weather data from 20160601 to 20160906
11: Requesting weather data from 20160907 to 20161213
12: Requesting weather data from 20161214 to 20170321
13: Requesting weather data from 20170322 to 20170626
14: Requesting weather data from 20170627 to 20171001
15: Requesting weather data from 20171002 to 20180106
16: Requesting weather data from 20180107 to 20180413
17: Requesting weather data from 20180414 to 20180719
18: Requesting weather data from 20180720 to 20181024
19: Requesting weather data from 2018

In [4]:
weather_df = pd.concat(daily_data, ignore_index = True)

In [5]:
weather_df.to_csv('JordanWeather_2014to2021.csv', index=False)