In [126]:
import pandas as pd
import json

# Load the JSON data from the file
with open("fileDirectory.json", "r") as file:
    data = json.load(file)

# Flatten the data into a list of dictionaries, where each entry includes the key
flattened_data = [
    {'key': key, 'startYear': entry['startYear'], 'endYear': entry['endYear'], 'fileName': entry['fileName']}
    for key, entries in data.items()
    for entry in entries  # Handle the array of entries under each key
]

# Create a DataFrame from the flattened data
directory = pd.DataFrame(flattened_data)


In [127]:
def get_file(department, year):
    # Filter rows based on the department key and year range
    filtered_df = directory[(directory['key'] == str(department)) & (directory['startYear'] <= year) & (directory['endYear'] >= year)]
    # Get the filename if the condition matches
    if not filtered_df.empty:
        file_name = filtered_df['fileName'].iloc[0]
        file = pd.read_csv("./filtered_datasets/"+file_name, delimiter=";")
        file["date"] = pd.to_datetime(file["date"], format='%Y%m%d')
        return file
    else:
        return "No matching file found."

In [153]:
def closest_point_with_date(ref_point, date_to_find, df):
    df['stationNumber'] = df['stationNumber'].astype(str)
    
    # Filter DataFrame by the specific date
    df_filtered = df[df['date'] == date_to_find]
    
    if df_filtered.empty:
        return None, None  # No data available for the given date
    
    # Apply the haversine function to each row in the filtered DataFrame
    distances = df_filtered.apply(lambda row: haversine(ref_point[0], ref_point[1], row['latitude'], row['longitude']), axis=1)
    # Find the index of the smallest distance
    min_index = distances.idxmin()
    # Return the closest point and the distance
    closest_point = df_filtered.loc[min_index]
    return closest_point, distances[min_index]

In [161]:
point = (47.218372, -1.553621)
day = 27
month = 10
year = 1999
department = 44

data = [
    {"point": (47.218372, -1.553621), "day": 27, "month": 10, "year": 1999, "department": 44},
    {"point": (48.8566, 2.3522), "day": 15, "month": 6, "year": 1987, "department": 75},
    {"point": (51.5074, -0.1278), "day": 3, "month": 9, "year": 2005, "department": 11},
    {"point": (40.7128, -74.0060), "day": 8, "month": 2, "year": 1979, "department": 32},
    {"point": (34.0522, -118.2437), "day": 21, "month": 12, "year": 1992, "department": 62}
]

for entry in data:
    point = entry["point"]
    day = entry["day"]
    month = entry["month"]
    year = entry["year"]
    department = entry["department"]
    
    date = str(year) + "-" + str(month) + "-" + str(day)
    
    selected_dataset = get_file(department, year)
    
    closest_point, distance = closest_point_with_date(point, date, selected_dataset)
    
    print(closest_point)

stationNumber                        44109001
stationName                     NANTES-DOULON
latitude                                47.22
longitude                            -1.52067
altitude                                   13
date                      1999-10-27 00:00:00
precipitation                             0.4
qualityPrecipitation                        1
minTemperature                            NaN
qualityMinTemperature                     NaN
maxTemperature                            NaN
qualityMaxTemperature                     NaN
avgHourlyTemp                             NaN
qualityAvgHourlyTemp                      NaN
minGroundTemp                             NaN
qualityMinGroundTemp                      NaN
frostDuration                             NaN
qualityFrostDuration                      NaN
avgWindSpeed10m                           NaN
qualityAvgWindSpeed10m                    NaN
avgWindSpeed2m                            NaN
qualityAvgWindSpeed2m             