# DWD data preprocessing
This notebook will be used to test some of the gdal functions to prepare the DWD data to be used in SolEst later on.

In [1]:
import sys

In [2]:
sys.path.append('/Users/evandro/PycharmProjects/solest_data_handling/scripts')

In [3]:
from osgeo import gdal
import os
import config
import numpy as np
gdal.UseExceptions()



## 1 Extract header and data

In [4]:
# move back to main directory

In [5]:
os.chdir('/Users/evandro/PycharmProjects/solest_data_handling/')

In [6]:
sample_file = './data/dwd/ambient_temperature/grids_germany_monthly_air_temp_mean_199401.asc'
parsed_file = './processed_data/dwd/ambient_temperature/grids_germany_monthly_air_temp_mean_199401.tif'

In [24]:
with open(sample_file, 'r') as file:
    line = file.readline()
    print(line)

NCOLS 654



In [68]:
# 0. Get the actual data from the ascii file and parse the header
def parse_ascii_grid(file_path):
    header = {}
    data = []
    
    with open(file_path, 'r') as file:
        # Read header section
        line = file.readline()
        while line.strip() and not line.startswith("[ASCII-Raster-Format]"):
            if '=' in line:
                key, value = line.strip().split('=', 1)
                header[key] = value
            line = file.readline()
            
    with open(file_path, 'r') as file:
        for line in file:
            n_cells = len(line.split(" "))
            
            # ascii raster info
            if n_cells == 2:
                key, value = line.strip().split(' ', maxsplit=1)
                header[key] = value

            elif n_cells > 2:
                line = line.strip()
                line = [el for el in line.split(" ") if el != '']
                line = [float(el)/10 else '' for el in line if el != '-999']
                data.append(line)
    return header, data

## 2. Convert to GeoTIFF

In [69]:
def create_geotiff_from_data(data, header, output_filename):
    """ Create a GeoTIFF file from parsed data and header. """
    array = np.array(data)
    nrows, ncols = array.shape
    xllcorner = float(header['XLLCORNER'])
    yllcorner = float(header['YLLCORNER'])
    cellsize = float(header['CELLSIZE'])
    nodata = float(header['NODATA_VALUE'])

    driver = gdal.GetDriverByName('GTiff')
    dataset = driver.Create(output_filename, ncols, nrows, 1, gdal.GDT_Float32, ["COMPRESS=LZW"])
    
    dataset.SetGeoTransform([xllcorner, cellsize, 0, yllcorner, 0, -cellsize])
    dataset.SetProjection('EPSG:31467')  # Set the projection

    band = dataset.GetRasterBand(1)
    band.SetNoDataValue(nodata)
    band.WriteArray(array)
    
    dataset.FlushCache()

In [70]:
header, data = parse_ascii_grid(sample_file)

In [71]:
data

[[3.8, 3.8],
 [3.8, 3.8],
 [3.8, 3.8],
 [3.8, 3.8, 3.8],
 [3.8, 3.8, 3.8, 3.8],
 [3.7, 3.7, 3.8, 3.8],
 [3.7, 3.8],
 [3.8],
 [3.7],
 [3.8, 3.8],
 [3.7],
 [3.6, 3.7, 3.8],
 [3.7, 3.7, 3.8],
 [3.7, 3.7, 3.8],
 [3.8, 3.7, 3.7, 3.8],
 [3.8, 3.8, 3.7, 3.8],
 [3.8, 3.8, 3.8, 3.8],
 [3.8, 3.8, 3.8, 3.8, 3.8, 3.7, 3.7, 3.7, 3.7, 3.6, 3.6, 3.6, 3.6],
 [3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6],
 [3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6],
 [3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.8,
  3.7,
  3.8,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.7,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3.6,
  3

In [13]:
create_geotiff_from_data(data, header, parsed_file)

ValueError: not enough values to unpack (expected 2, got 1)