In [1]:
#### import libraries
import pandas as pd
from pathlib import Path
import json
# geopy is a Python library for locating the coordinates of addresses
# import Nominatim class from geopy library to use for geocoding 
# addresses into latitude and longitude.
from geopy.geocoders import Nominatim

In [2]:
#### load all CSV files
folder_path = '/kaggle/input/predict-energy-behavior-of-prosumers'
# look for all files in the folder ending with "".csv"
csv_files = Path(folder_path).glob("*.csv")

# initialize an empty dictionary to store the DataFrames
files = {}

# loop through each CSV file found
for csv_file in csv_files:
    # create a variable name based on the file name (without the folder path and the ".csv" part)
    variable_name = csv_file.stem
    
    # read the CSV file and assign its DataFrame to the dictionary with the key
    files[variable_name] = pd.read_csv(csv_file)
    
print(files.keys())

dict_keys(['client', 'gas_prices', 'electricity_prices', 'weather_station_to_county_mapping', 'historical_weather', 'train', 'forecast_weather'])


In [3]:
#### load the JSON file
# open the JSON file for reading
# with statement ensures the file is properly closed after its suite finishes
with open('/kaggle/input/predict-energy-behavior-of-prosumers/county_id_to_name_map.json') as f:
    # load the JSON data into a Python dictionary
    county_codes = json.load(f)

print(county_codes)

{'0': 'HARJUMAA', '1': 'HIIUMAA', '2': 'IDA-VIRUMAA', '3': 'JÄRVAMAA', '4': 'JÕGEVAMAA', '5': 'LÄÄNE-VIRUMAA', '6': 'LÄÄNEMAA', '7': 'PÄRNUMAA', '8': 'PÕLVAMAA', '9': 'RAPLAMAA', '10': 'SAAREMAA', '11': 'TARTUMAA', '12': 'UNKNOWN', '13': 'VALGAMAA', '14': 'VILJANDIMAA', '15': 'VÕRUMAA'}


In [4]:
#### clean the JSON file
# initialize an empty dictionary
parsed_counties = {}

# loop through each key-value pair in the original dictionary
for code, name in county_codes.items():
    # process the name, make it lowercase and remove trailing 'maa'
    processed_name = name.lower().rstrip('maa')
    # assign the new name to the code in the new dictionary
    parsed_counties[processed_name] = code
    
print(parsed_counties)

{'harju': '0', 'hiiu': '1', 'ida-viru': '2', 'järv': '3', 'jõgev': '4', 'lääne-viru': '5', 'lääne': '6', 'pärnu': '7', 'põlv': '8', 'rapl': '9', 'saare': '10', 'tartu': '11', 'unknown': '12', 'valg': '13', 'viljandi': '14', 'võru': '15'}


In [5]:
#### map codes and locations
# a dictionary that maps full country names to their shortened versions
name_mapping = {
    "valga": "valg",
    "põlva": "põlv",
    "jõgeva": "jõgev",
    "rapla": "rapl",
    "järva": "järv"
}

# initialize the geocoder
geoLoc = Nominatim(user_agent='GetLoc')

# loop through each row with unique latitude and longitude values
for coords in files['forecast_weather'][['latitude', 'longitude']].drop_duplicates().itertuples(index=False):
    # extract latitude and longitude from the current row
    lat, lon = coords[0], coords[1]
    
    # use the geocoder to get the location information for the current coordinates
    locname = geoLoc.reverse((lat, lon))
    
    # access raw JSON file which is in python dictionary format,
    # using get() to access key in the dictionary
    if locname and locname.raw.get('address', {}).get('country') == "Eesti":
        county_name = locname.raw['address'].get('county', '').split()[0].lower()
        # Use a name mapping dictionary or the original county name if not found
        county_name_mapped = name_mapping.get(county_name, county_name)
        # Output the county information
        print(f"county: '{county_name_mapped}', county code:", parsed_counties[county_name_mapped], (lat, lon))

county: 'saare', county code: 10 (57.6, 23.2)
county: 'võru', county code: 15 (57.6, 26.7)
county: 'võru', county code: 15 (57.6, 27.2)
county: 'saare', county code: 10 (57.9, 21.7)
county: 'saare', county code: 10 (57.9, 22.2)
county: 'saare', county code: 10 (57.9, 23.2)
county: 'pärnu', county code: 7 (57.9, 23.7)
county: 'pärnu', county code: 7 (57.9, 24.2)
county: 'valg', county code: 13 (57.9, 26.2)
county: 'võru', county code: 15 (57.9, 26.7)
county: 'võru', county code: 15 (57.9, 27.2)
county: 'võru', county code: 15 (57.9, 27.7)
county: 'saare', county code: 10 (58.2, 21.7)
county: 'saare', county code: 10 (58.2, 22.2)
county: 'saare', county code: 10 (58.2, 22.7)
county: 'saare', county code: 10 (58.2, 23.2)
county: 'pärnu', county code: 7 (58.2, 23.7)
county: 'pärnu', county code: 7 (58.2, 24.2)
county: 'pärnu', county code: 7 (58.2, 24.7)
county: 'pärnu', county code: 7 (58.2, 25.2)
county: 'viljandi', county code: 14 (58.2, 25.7)
county: 'tartu', county code: 11 (58.2, 26.