In [27]:
import matplotlib.pylab as plt
import numpy as np
import matplotlib.cm as cm
import cmocean.cm as cmo
import intake

import numcodecs
try:
    import gribscan
except:
    %pip install gribscan
    import gribscan
import cfgrib
import gc

from getpass import getuser # Libaray to copy things
from pathlib import Path # Object oriented libary to deal with paths
from dask.utils import format_bytes
from dask.diagnostics import ProgressBar

from distributed import Client, progress, wait # Libaray to orchestrate distributed resources
#from dask_jobqueue import SLURMCluster # Setting up distributed memories via slurm
import numpy as np # Pythons standard array library
import xarray as xr # Libary to work with labeled n-dimensional data
import dask # Distributed data libary
import dask.distributed
import multiprocessing
from subprocess import run, PIPE
import re
from math import radians, cos, sin, sqrt, atan2
import sys
import os
import warnings
warnings.filterwarnings(action='ignore')
dask.config.config.get('distributed').get('dashboard').update({'link':'{JUPYTERHUB_SERVICE_PREFIX}/proxy/{port}/status'})
import pandas as pd
import eccodes
import healpy
import json


In [28]:
#This script will load the quality controlled precipitation dataset from the station closest to
# the input latitude and longitude

target_latitude = 47
target_longitude = 9
 

In [29]:
# Function to calculate the distance between two latitude and longitude pairs
def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0  
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

In [30]:
def extract_coordinates(file_name):
    # Updated regex pattern to match the 3rd and 4th numeric groups
    match = re.search(r'_(\d+\.\d+)_(\d+\.\d+)_', file_name)
    if match:
        longitude = float(match.group(1))  # 3rd group is Longitude
        latitude = float(match.group(2))   # 4th group is Latitude
        return longitude, latitude
    return None, None

In [31]:
# Function to find the file with the closest coordinates
def find_closest_file(directory, target_lat, target_lon):
    closest_file = None
    min_distance = float('inf')
        # Loop through each file in the directory
    for file_name in os.listdir(directory):
        # Check if file is a txt file
        if file_name.endswith('.txt'):
            file_path = os.path.join(directory, file_name)
            # Extract coordinates from file name
            lon, lat = extract_coordinates(file_name)
            if lon is not None and lat is not None:
                # Calculate the distance from the target coordinates
                distance = haversine(target_lat, target_lon, lat, lon)
                #print(f"Checking file: {file_name}, Distance: {distance:.2f} km")
                # Update the closest file if a shorter distance is found
                if distance < min_distance:
                    min_distance = distance
                    closest_file = file_path

    return closest_file, distance

In [32]:
# Function to load data from the closest file
def load_data(file_path):
    data = pd.read_csv(file_path, sep=",")
    data['Date'] = pd.to_datetime(data['Date'])  # Convert Date to datetime format
    xarray_data = xr.Dataset.from_dataframe(data.set_index('Date'))
    return xarray_data


In [33]:


# Directory where the files are located
directory = "/home/b/b382083/work/Hackathon_challenge/Station_data"  # Update with the correct directory path

# Find the closest file based on coordinates
closest_file,distance = find_closest_file(directory, target_latitude, target_longitude)

# Print and load the closest file
if closest_file:
    print(f"Closest file found: {closest_file}, Distance: {distance:.2f} km")
    data_xarray = load_data(closest_file)
    precip = data_xarray[["Date", "P_QC"]]
    #print(precip)
else:
    print("No matching file found.")

Closest file found: /home/b/b382083/work/Hackathon_challenge/Station_data/LOM0339_1352_8.619_46.534_19870101-19961230.txt, Distance: 219.48 km
