In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('data/weatherus.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df_filtered = df[(df['DATE'] >= '2015-01-01') & (df['DATE'] <= '2018-12-31')]
df_filtered.to_csv('data/weatherus_filtered.csv', index=False)

In [2]:
import pandas as pd
weather_data = pd.read_csv('data/weatherus_filtered.csv')

In [3]:
weather_data = weather_data.drop_duplicates(subset='ID')

In [5]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371 
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    
    a = np.sin(delta_phi / 2.0)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2.0)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    return R * c

regions_data = pd.read_csv('data/regions_coordinates.csv')

closest_station_ids = []

for _, region in regions_data.iterrows():
    region_lat = region['Latitude']
    region_lon = region['Longitude']
    
    distances = weather_data.apply(
        lambda row: haversine(region_lat, region_lon, row['Latitude'], row['Longitude']),
        axis=1
    )
    
    closest_station_index = distances.idxmin()
    closest_station_id = weather_data.at[closest_station_index, 'ID']
    
    closest_station_ids.append(closest_station_id)

regions_data['ClosestStationID'] = closest_station_ids

regions_data.to_csv('data/updated_regions_data.csv', index=False)

print("Closest station IDs have been added to the regions file.")


Closest station IDs have been added to the regions file.


In [9]:
df_updated_regions = pd.read_csv('data/updated_regions_data.csv')
stations = df_updated_regions['ClosestStationID'].tolist()

df_weather = pd.read_csv('data/weatherus_filtered.csv')

filtered_weather_data = df_weather[df_weather['ID'].isin(stations)]



In [12]:
merged_data = filtered_weather_data.merge(regions_data[['Region', 'ClosestStationID']], 
                                  left_on='ID', 
                                  right_on='ClosestStationID', 
                                  how='left')

merged_data = merged_data.drop(columns=['ClosestStationID'])

merged_data.to_csv('data/merged_weather_data.csv', index=False)