In [1]:
import os
import numpy as np
import math
import pandas as pd
import glob

In [2]:
def parseArchiveFile(year, month):

	# All UTC dates and times
	fileName = f"unzipped/kis_tow_{year}{month}" 
	
	for file in glob.glob(f"{fileName}*"):
		if ("*" not in file): fileName = file

	colspecs = [(0, 20), (20, 40), (40, 88), (88, 108), (108, 128), (128, 148), (148, 168), (168, 188), (188, 208), (208, 228), (228, 248), (248, 268), (268, 288), (288, 308), (308, 328), (328, 348), (348, 368), (368, 388)]
	df = pd.read_fwf(fileName, skiprows=range(22), colspecs=colspecs, index_col=0, parse_dates=True)
	df.columns = df.columns.str.replace("#", "")


	return df

df = parseArchiveFile(2022, "12")

In [3]:

allLocations = df["LOCATION"].tolist()
allLocations = list(dict.fromkeys(allLocations))

dfLocations = df.drop(df.columns[5:len(list(df.columns))], axis=1)

dfAnalysedLocation = pd.DataFrame(columns=["NAME", "LATITUDE", "LONGITUDE", "ALTITUDE"])

for location in allLocations:
 dfSingleLocation = dfLocations.loc[df["LOCATION"] == location]

 nameArray = dfSingleLocation["NAME"].to_numpy()
 if (np.all(nameArray == nameArray[0]) == True): nameValue = nameArray[0]
 else: nameValue = "MULTIPLE"

 latitudeArray = dfSingleLocation["LATITUDE"].to_numpy()
 if (np.all(latitudeArray == latitudeArray[0]) == True): latitudeValue = latitudeArray[0]
 else: latitudeValue = "MULTIPLE"

 longitudeArray = dfSingleLocation["LONGITUDE"].to_numpy()
 if (np.all(longitudeArray == longitudeArray[0]) == True): longitudeValue = longitudeArray[0]
 else: longitudeValue = "MULTIPLE"

 altitudeArray = dfSingleLocation["ALTITUDE"].to_numpy()
 if (np.all(altitudeArray == altitudeArray[0]) == True): altitudeValue = altitudeArray[0]
 else: altitudeValue = "MULTIPLE"

 dfAnalysedLocation.loc[location, :] = [nameValue, latitudeValue, longitudeValue, altitudeValue]


# Print the table
display(dfAnalysedLocation)

# Make this table into an HTML file
f = open("locationsTable.html", "w")
f.write(dfAnalysedLocation.to_html())
f.close()


Unnamed: 0,NAME,LATITUDE,LONGITUDE,ALTITUDE
201_W_DV,platform D15-FA-1 locatie DV,54.325556,2.935833,42.7
201_W_DV1,platform D15-FA-1 locatie DV1,54.325556,2.935833,42.7
201_W_DV2,platform D15-FA-1 locatie DV2,54.325556,2.935833,42.7
203_W_PG,platform P11-B locatie PG,52.36,3.341667,41.8
203_W_PG1,platform P11-B locatie PG1,52.36,3.341667,41.8
...,...,...,...,...
380_W_22t,Maastricht locatie 22t,50.9167,5.77323,115.12
391_W_a,Arcen,51.497222,6.196111,19.5
871_W_a,Saba,17.646111,-63.220833,31.0
873_W_a,St. Eustatius,17.495485,-62.982673,36.02


In [4]:
# Save all locations in KML file

import simplekml
kml = simplekml.Kml()
dfAnalysedLocation.apply(lambda X: kml.newpoint(name=X["NAME"], coords=[( X["LONGITUDE"],X["LATITUDE"])]) ,axis=1)
kml.save(path = "locationsArchiveFile.kml")

In [5]:
dfFiltered = df.drop(["NAME", "LATITUDE", "LONGITUDE", "ALTITUDE", "DDN_10", "DD_STD_10", "DDX_10", "FF_10M_STD_10", "SQUALL_10", "FX_SENSOR_MD_10", "FX_10M_MD_10"], axis=1)

# The files from this dataset are not complete, which you'd expect since they are almost a month old
# Therefore only use these files 2 months later after the period for the file is over (two new files released)

# KNMI stations are always build at 10 m height (by the standard) and therefore the sensor value and recalculated value are always the same:
# dfFiltered.loc[df["LOCATION"] == "310_W_a"][2080:2100]

# This is not the case for a RWS maintained station:
dfFiltered.loc[df["LOCATION"] == "316_W_a"][2080:2100]

# KNMI and Rijkswaterstaat always use the measured values recalculated for a height of 10 m above sealevel, even in the netCDF files from the KNMI
# So Buienradar, the RWS API's etc. always use the ...10M... values

# display(dfFiltered)


Unnamed: 0_level_0,LOCATION,FF_SENSOR_10,FF_10M_10,DD_10,FX_10M_10,FX_SENSOR_10
# DTG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-15 10:50:00,316_W_a,2.39,2.26049,128.3,,3.23
2022-12-15 11:00:00,316_W_a,3.17,2.99822,153.4,,3.71
2022-12-15 11:10:00,316_W_a,3.07,2.90364,158.1,,3.71
2022-12-15 11:20:00,316_W_a,2.59,2.44965,167.3,,3.23
2022-12-15 11:30:00,316_W_a,2.24,2.11861,190.4,,3.81
2022-12-15 11:40:00,316_W_a,2.72,2.5726,224.9,,3.98
2022-12-15 11:50:00,316_W_a,3.4,3.21575,240.2,,4.26
2022-12-15 12:00:00,316_W_a,2.68,2.53477,229.7,,3.98
2022-12-15 12:10:00,316_W_a,4.88,4.61555,258.1,,6.64
2022-12-15 12:20:00,316_W_a,3.81,3.60354,318.0,,6.43


In [6]:
dfFiltered.to_json("test.json", orient="split") 
# Maybe for production? size is not that big anymore and can be reduced even more by renaming columns / keys