In [132]:
# install the modules on the OS
#!pip install beautifulsoup4
#!pip install requests
#!pip install netCDF4

# import the modules
import pandas as pd
import datetime as dt
import numpy as np
import bs4
import os
import math
import requests
import selenium
import urllib
import time
from bs4 import BeautifulSoup
import re
from io import StringIO
import logging
from netCDF4 import Dataset

from ipynb.fs.full.config_and_functions import *

In [133]:
# Base configuratation
logging.basicConfig(level=logging.DEBUG)


try: 
    # Get JSON of mapproducts
    url = 'https://www.meteoschweiz.admin.ch/home/klima/schweizer-klima-im-detail/homogene-messreihen-ab-1864.html'
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')

    mapproduct_url = soup.find("div", {"id": "kartenprodukte-map"})['data-json-url']
    mapproduct_json_url = urllib.parse.urljoin(url, mapproduct_url)

    logging.info(mapproduct_json_url)


    # Interpret JSON
    s = requests.Session()
    s.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'}
    
    res = s.get(mapproduct_json_url)
    res.encoding = 'ISO-8859-1'
    res.raise_for_status()
    data = res.json()
    stations_names = data['filters']['combinations']
    stations_names.remove('schweiz') # remove 'schweiz' as it is the mean of all station data
    logging.info(stations_names)
    
    station_txt_url_pattern = data['assets']['documents']['files'][0]['filepattern']
    logging.info(station_txt_url_pattern)
    
    stations_urls_list = [urllib.parse.urljoin(url, station_txt_url_pattern.replace('[STATION]', station_url)) for station_url in stations_names]
    
    # Read txt files of stations, one by one
    for i, station_txt_file in enumerate(stations_urls_list): 
        
        res = s.get(station_txt_file)
        res.encoding = res.apparent_encoding
        lines = res.text.splitlines()
        
        for j, line in enumerate(lines):

            # Grab station informations and fill pandas dataFrame
            # Get name
            if line.startswith('Station:'):
                name = line.split(':')[1].strip()
        
            # Get station altitude
            if line.startswith('Altitude'):
                altitude = float(re.sub("[^0-9.\-]", "", line.split(':')[1])) # remove unit

                logging.info(altitude)

            # Get station coordinates and convert them into longitude and latitude
            if line.startswith('Coordinates:'):
                coordinates = line.split(':')[1].split('/')
                longitude = dms2dec(coordinates[0].strip())
                latitude = dms2dec(coordinates[1].strip())
                
                logging.info(longitude)
                logging.info(latitude)

            # Get date
            if re.sub(' +', ' ', line).startswith('Year Month'):
                
                table_string = ''
                for k, line in enumerate(lines[(j):]):
                    
                    if k == 0:
                        table_string += 'temperature;precipitation\n' # Set columns
                    else:
                        line = re.sub(' +', ';', line) # Remove multiple whitespace with semicolon
                        line_items = line.split(';')
                        date = dt.date(year=int(line_items[0]), month=int(line_items[1]), day=1)
                        table_string += date.strftime('%Y-%m-%d') + ';' + line_items[2] + ';' + line_items[3] + '\n'

                
                df = pd.read_csv(StringIO(table_string), sep=";", parse_dates=True, index_col=0)
                table_string = ''
                break

        """if i >= 1:
            break"""
        
    
except requests.exceptions.HTTPError as e: 
    print(e)
    

INFO:root:https://www.meteoschweiz.admin.ch/product/output/climate-data/homogenous-monthly-data/version__20200420_0223/mapproduct_de.json
INFO:root:['BAS', 'BER', 'CHD', 'CHM', 'DAV', 'ENG', 'GSB', 'GVE', 'LUG', 'PAY', 'SAE', 'SIA', 'SIO', 'SMA']
INFO:root:/product/output/climate-data/homogenous-monthly-data-processing/data/homog_mo_[STATION].txt
INFO:root:316.0
INFO:root:47.53472222222222
INFO:root:7.583333333333333
INFO:root:553.0
INFO:root:46.98444444444444
INFO:root:7.452222222222223
INFO:root:1028.0
INFO:root:46.46888888888889
INFO:root:7.134444444444445
INFO:root:1136.0
INFO:root:47.05
INFO:root:6.9686111111111115
INFO:root:1594.0
INFO:root:46.80222222222222
INFO:root:9.835
INFO:root:1036.0
INFO:root:46.8175
INFO:root:8.401666666666667
INFO:root:2472.0
INFO:root:45.86694444444444
INFO:root:7.167222222222223
INFO:root:411.0
INFO:root:46.23583333333333
INFO:root:6.118611111111111
INFO:root:273.0
INFO:root:46.00083333333333
INFO:root:8.951666666666666
INFO:root:490.0
INFO:root:46.80

In [145]:
print(df.describe())

rootgrp = Dataset("../../application/data/RhiresM_1961_2019_ch01r.swisscors/RhiresM_ch01r.swisscors_196101010000_196112010000.nc", "r", format="NETCDF4")

print(rootgrp.data_model)

rootgrp.close()

       temperature  precipitation
count  1875.000000    1875.000000
mean      8.467680      88.103680
std       6.795326      51.128343
min      -9.000000       0.000000
25%       2.600000      49.300000
50%       8.400000      79.400000
75%      14.800000     119.050000
max      22.700000     359.200000
NETCDF3_CLASSIC
