# Augmenting weather data

### Step 03

## Historical weather data
In order to get data and store it into a Pandas dataframe the following code needs to be provided with 
* `start date` in a close to UTC format, i.e. `2023-09-16T17:10`
* `end date` (format the same) and 
* `parameters` i.e. abbreviations such as `TS` to load historical data for temperature

> Import historical from ZAMG aka `geosphere` \
> Nearest Station ID is `7710` for Seibersdorf \
> Info about api: https://data.hub.geosphere.at/dataset/klima-v1-10min

In [2]:
import requests
import json
import pandas as pd
from pandas import json_normalize
from glom import glom, Flatten
from datetime import date

class MakeApiCall:

    def get_user_data(self, api, parameters):
        response = requests.get(f"{api}", params=parameters)
        if response.status_code == 200:
            # print data
            print("Successfully fetched the data with parameters provided")
            self.formatted_print(response.json())
            
            # get data into dataframe
            dataraw = response.json()
            
            df1 = glom(dataraw,  ('timestamps'))
            df2 = glom(dataraw,  ('features', ['properties.parameters.TS.data']))
            df3 = glom(dataraw,  ('features', ['properties.parameters.DD.data']))
            df4 = glom(dataraw,  ('features', ['properties.parameters.FFAM.data']))
            df22 = df2[0] #get rid of second array from parsing
            df33 = df3[0] #get rid of second array from parsing
            df44 = df4[0] #get rid of second array from parsing
            
            df = pd.DataFrame({'timestamp': df1, 'TS': df22, 'DD': df33, 'FFAM': df44})
            
            # print data example based on first line
            print(df.iloc[:1])
            #print("---")
            #print(df.values[:1])
            
            # save data to file
            fol = 'C:/Users/andre/Nextcloud/WS_2023/IKT/11_DataAugmentation/'
            filename = 'ZEHNMIN Datensatz_7710_fetched_at_' + str(date.today()) + '.csv'
            df.to_csv(fol+filename, sep=',', index=False, encoding='utf-8')
            print(f"Export to: " + filename + " successful")
            
            """
            multi-line comment
            """
            ##df=[]
            ##df.append(df1)
            ##df.append(df3)
            
            #print(df.values[:1])
            #print(df.loc[df.index[0]])
            #print(df.iloc[:1])
            #print(df.iloc[0]) #df[0][0]
            
            ##print(df[0][0], df[1][0])
        else:
            print(
                f"There's a {response.status_code} error with your request")

    def formatted_print(self, obj):
        """
        function prints text out of json-object 
        @param self:
        @param obj: json-object
        @return:    
        """
        text = json.dumps(obj, sort_keys=True, indent=4)  #indent - clustering of json "folder"
        print(text)

    def __init__(self, api):
        parameters = {} #"TL,DD,FFAM&station_ids=7710&start=2023-09-16T17:10&end=2023-09-16T23:50"}
        print(f"{api}")
        print(parameters)
        """
        # Lese Eingabe von der Konsole
        x = datetime(input("Datum eingeben: "))
        """
        self.get_user_data(api, parameters) # "station_ids": "7710", "start": "2023-09-16T17:10","end": "2023-09-16T23:50"


if __name__ == "__main__":
    api_call = MakeApiCall("https://dataset.api.hub.geosphere.at/v1/station/historical/klima-v1-10min?parameters=TS,DD,FFAM&station_ids=7710&start=2023-11-18T17:10&end=2023-12-19T20:50")
    #DD,FFAM,P,RFAM,TP,TS

https://dataset.api.hub.geosphere.at/v1/station/historical/klima-v1-10min?parameters=TS,DD,FFAM&station_ids=7710&start=2023-11-18T17:10&end=2023-12-19T20:50
{}
Successfully fetched the data with parameters provided
{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    16.504999,
                    47.976387
                ],
                "type": "Point"
            },
            "properties": {
                "parameters": {
                    "DD": {
                        "data": [
                            316.0,
                            304.0,
                            291.0,
                            257.0,
                            246.0,
                            263.0,
                            283.0,
                            286.0,
                            289.0,
                            284.0,
                            260.0,
                            277.0,
                           

## Current weather data
Helper code for testing to get the api working for historical data
> Using "zamg" module found on github

In [3]:
"""Asynchronous Python client for ZAMG weather data."""
import asyncio
import zamg
from zamg import ZamgData
#from zamg.exceptions import ZamgError
#from os import curdir

# Patch asyncio to allow nested event loops
import nest_asyncio 
nest_asyncio.apply()

async def main():
    """Sample of getting data"""
    try:
        async with ZamgData() as zamg:
            # option to disable verify of ssl check
            zamg.verify_ssl = False
            # trying to read zamg station id of the closest station
            data = await zamg.closest_station(48.03, 16.48)
            # set closest station as default one to read
            zamg.set_default_station(data)
            print("Closest_station = " + str(zamg.get_station_name) + " / " + str(data))
            # print list with all possible parameters
            print(f"Possible station parameters: {zamg.get_all_parameters()}")
            # set parameters directly
            ##zamg.station_parameters = "TL,SO"
            # or set parameters as list
            ##zamg.set_parameters(("TL", "TS", "DD", "FFAM"))
            # if none of the above parameters are set, all possible parameters are read
            
            # do an update
            await zamg.update()

            print(f"---------- Weather for station {zamg.get_station_name} ({data}) ----------")
            for param in zamg.get_parameters():
                print(
                    str(param)
                    + " -> "
                    + str(zamg.get_data(parameter=param, data_type="name"))
                    + " -> "
                    + str(zamg.get_data(parameter=param))
                    + " "
                    + str(zamg.get_data(parameter=param, data_type="unit"))
                )
            print("--- Last update:",zamg.last_update, " ---")
    except (ZamgError) as exc:
        print(exc)


if __name__ == "__main__":
    asyncio.run(main())

ModuleNotFoundError: No module named 'zamg'

## Import weather data from file
Code for importing and cleaning historical data, including quality flags

> Quality flag codes \
> (-1 ... erwartet, aber (noch) nicht empfangen) \
> 0   ... (noch) nicht geprüft \
> 1   ... nicht gemessen \
> 2   ... fehlt oder zu spät \
> 100 ... original, ok \
> 200 ... original, ok \
> 300 ... nicht original, manuell ergänzt \
> 400 ... nicht original, gelöscht \
> 500 ... nicht original, automatisch ergänzt 

Codes 100-300 and 500 shall be taken into account \
As Method ffill (forward fill) is used to get rid of the very few missing data points, quality flag code 2 and 400 can also be taken into account (empty cells are filled with previous value!)

In [45]:
import pandas as pd 
import csv

class ImportWeather:
    def __init__(self):
        self.process()
        
    def process(self):
        # 01 import data from csv
        data = csv.DictReader(open(
                    'C:/Users/andre/Nextcloud/WS_2023/IKT/11_DataAugmentation/ZEHNMIN Datensatz_7710_20161114T1510_20230916T1700.csv'), 
                    delimiter=",")
        df = pd.DataFrame(data)
        #print(df)

        ROOM = 'GA'
        #df = df.iloc[0:10, :] 
        df['time'] = pd.to_datetime(df.iloc[:, 0]) #set 1st column as timestamp so numeric function afterwards doesn't make 'NaT' on second conversion
        #print(df.shape[1])
        #for col in range (1, df.shape[1]):
            #df.iloc[:, col] = df.iloc[:, col].apply(pd.to_numeric, errors='coerce') #.str.replace('', 'NaN').astype(float) #first convert to string to replace leer with NaN, then convert to float
            #df['station'].pd.to_numeric(errors='coerce')
        df.rename(columns=lambda x: ROOM +'_'+x[:], inplace=True)
        df = df.apply(pd.to_numeric, errors='coerce') #'ignore'
        df['UTC'] = pd.to_datetime(df.iloc[:, 0]) #overwrite 1st column with timestamp again // add column
        #df.info()
        print(df.dtypes)
        print (f'{len(df.index)} lines into DF imported')
        # 02 check for duplicates, list and clean them
        ## not needed
        # 03 add data to the 10mins timestamp list
        ## not needed

        # 04 fill gaps
        #show gaps first...
        print (df[df.isna().any(axis=1)].tail(50))
        #fill - not needed as we take out "with _FLAG marked" NaN values anyway??? 
        met = 'ffill'
        df = self.fill_gap_in_col(df, met)
        print (df.tail(50))
        print (f'"NaN" gaps filled with method \'{met}\'')
        
        # 05 export to csv
        filename = 'C:/Users/andre/Nextcloud/WS_2023/IKT/20_Data/' + str(ROOM) + '_AUG_cleaned_and_filled_at_' + str(date.today()) + '.csv'
        df.to_csv(filename, sep=',', index=False, encoding='utf-8')
        print(f"Export to: \'" + filename + "\' successful")
        
    def fill_gap_in_col(self, col, method):
        """Fills true gap in series."""
        colf = col.copy()
        first_idx = colf.first_valid_index()
        last_idx = colf.last_valid_index()
        #colf.loc[first_idx:last_idx] = colf.loc[first_idx:last_idx].fillna(method=method) #deprecated
        if (method == 'ffill'):
            colf.loc[first_idx:last_idx] = colf.loc[first_idx:last_idx].ffill()
        return colf
        
if __name__ == "__main__":
    ImportWeather()  #call Import class and there it directly jumps to __init__

GA_time                   int64
GA_station                int64
GA_DD                   float64
GA_DD_FLAG                int64
GA_FFAM                 float64
GA_FFAM_FLAG              int64
GA_FFX                  float64
GA_FFX_FLAG               int64
GA_P                    float64
GA_P_FLAG                 int64
GA_RF                   float64
GA_RF_FLAG                int64
GA_TL                   float64
GA_TL_FLAG                int64
GA_ZEITX                float64
GA_ZEITX_FLAG             int64
UTC              datetime64[ns]
dtype: object
359580 lines into DF imported
                    GA_time  GA_station  GA_DD  GA_DD_FLAG  GA_FFAM  \
359530  1694854200000000000        7710  145.0         100      4.3   
359531  1694854800000000000        7710  140.0         100      4.7   
359532  1694855400000000000        7710  130.0         100      5.3   
359533  1694856000000000000        7710  133.0         100      4.9   
359534  1694856600000000000        7710  128.0         10

Export to: 'C:/Users/andre/Nextcloud/WS_2023/IKT/20_Data/GA_AUG_cleaned_and_filled_at_2023-12-01.csv' successful
