In [1]:
import datetime

import pandas as pd
import numpy as np

import asyncio
import requests_html
from requests.adapters import HTTPAdapter, Retry
from requests.exceptions import MissingSchema
from requests_html import AsyncHTMLSession
from urllib3.exceptions import MaxRetryError

from bs4 import BeautifulSoup
import re

from tqdm.auto import tqdm

In [2]:
async def getResponse(months_years):
    
    pages = (requestHeaders(weather_date=mnth_yr) for mnth_yr in months_years)
    return await asyncio.gather(*pages)

async def requestHeaders(weather_date):
    
    url = "https://tcktcktck.org/jordan/amman/amman-city-centre/" + weather_date
    
    asession = AsyncHTMLSession()
    
    headers = {
        'authority': 'tcktcktck.org',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
        'accept-language': 'en-GB,en;q=0.5',
        'cache-control': 'max-age=0',
        # 'cookie': 'csrftoken=j4aR4TtvyFpTCTYT4nehWGMWTxYXMet9HIEZb89yTgUFtwEGxS4ioHiYYZI26pHm',
        'sec-fetch-dest': 'document',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-site': 'none',
        'sec-fetch-user': '?1',
        'sec-gpc': '1',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
    }
    
    retries = Retry(total=10, backoff_factor=0.05, status_forcelist=[500, 502, 503, 504, 524])
    adapter = HTTPAdapter(max_retries=retries)

    asession.mount("https://", adapter)
    asession.mount("http://", adapter)

    try:
        resp = await asession.get(url, headers=headers)
        
        return get_data(resp=resp)

    except (MaxRetryError, requests_html.requests.exceptions.RetryError, MissingSchema) as e:              
        return "Response Error"
    
    
def get_data(resp):

    soup = BeautifulSoup(resp.text, "lxml")
    rows = soup.find_all(name='div', attrs={'class' : 'table-responsive'})[-1].find_all('tr')[2:]
    
    data = []
    
    for row in rows:
        data.append({
            'date' : row.find_all('td')[0].text,
            'precip_mm' : row.find_all('td')[-1].text.split(" | ")[0],
            'precip_in' : row.find_all('td')[-1].text.split(" | ")[-1]
        })
        
    return pd.DataFrame(data)

In [None]:
months = [
    'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 
    'october', 'november', 'december'
]

years = range(2014, 2021)

months_years = []
for yr in years:
    for m in months:
        months_years.append(f'{m}-{yr}')
        
precip_data = await getResponse(months_years=months_years)

In [None]:
precip_data_df = pd.concat(precip_data, ignore_index=True)

In [None]:
precip_data_df.info()

In [None]:
precip_data_df['date'] = pd.to_datetime(precip_data_df['date'])

precip_data_df[['precip_mm', 'precip_in']] = precip_data_df[['precip_mm', 'precip_in']].astype('float64')

In [None]:
precip_data_df.plot(x='date', y='precip_mm')

In [None]:
precip_data_df.plot(x='date', y='precip_in')

In [None]:
precip_data_df.to_csv("AmmanJordan_DailyPrecip_2014to2020.csv", index=False)