In [1]:
#### import libraries
import pandas as pd
from pathlib import Path
import json
# geopy is a Python library for locating the coordinates of 
# addresses, cities, countries, and landmarks across the globe
# using different geocoding services and Nominatim is one of those services.
from geopy.geocoders import Nominatim

In [2]:
#### load all CSV files
folder_path = '/kaggle/input/predict-energy-behavior-of-prosumers'
# look for all files in the folder ending with "".csv"
csv_files = Path(folder_path).glob("*.csv")

# initialize an empty dictionary to store the DataFrames
dataframes = {}

# loop through each CSV file found
for csv_file in csv_files:
    # create a variable name based on the file name (without the folder path and the ".csv" part)
    variable_name = csv_file.stem
    
    # read the CSV file and assign its DataFrame to the dictionary with the key
    dataframes[variable_name] = pd.read_csv(csv_file)

In [3]:
#### load the JSON file
# open the JSON file for reading
# with statement ensures the file is properly closed after its suite finishes
with open('/kaggle/input/predict-energy-behavior-of-prosumers/county_id_to_name_map.json') as f:
    # load the JSON data into a Python dictionary
    county_codes = json.load(f)

print(county_codes)

{'0': 'HARJUMAA', '1': 'HIIUMAA', '2': 'IDA-VIRUMAA', '3': 'JÄRVAMAA', '4': 'JÕGEVAMAA', '5': 'LÄÄNE-VIRUMAA', '6': 'LÄÄNEMAA', '7': 'PÄRNUMAA', '8': 'PÕLVAMAA', '9': 'RAPLAMAA', '10': 'SAAREMAA', '11': 'TARTUMAA', '12': 'UNKNOWN', '13': 'VALGAMAA', '14': 'VILJANDIMAA', '15': 'VÕRUMAA'}


In [4]:
#### clean the JSON file
# initialize an empty dictionary
parsed_counties = {}

# loop through each pair in the original dictionary
for code, name in county_codes.items():
    # process the name, make it lowercase and remove trailing 'maa'
    processed_name = name.lower().rstrip('maa')
    # assign the new name to the code in the new dictionary
    parsed_counties[processed_name] = code
    
print(parsed_counties)

{'harju': '0', 'hiiu': '1', 'ida-viru': '2', 'järv': '3', 'jõgev': '4', 'lääne-viru': '5', 'lääne': '6', 'pärnu': '7', 'põlv': '8', 'rapl': '9', 'saare': '10', 'tartu': '11', 'unknown': '12', 'valg': '13', 'viljandi': '14', 'võru': '15'}


In [5]:
#### map codes and locations
name_mapping = {
    "valga": "valg",
    "põlva": "põlv",
    "jõgeva": "jõgev",
    "rapla": "rapl",
    "järva": "järv"
}

# initialize the geocoder once, outside the loop
geoLoc = Nominatim(user_agent='GetLoc')

'''# loop through each row with unique latitude and longitude values
for coords in forecast_weather[['latitude', 'longitude']].drop_duplicates().itertuples(index=False):
    # get latitude and longitude
    lat, lon = coords[0], coords[1]
    
    # get the location information for the current coordinates
    locname = geoLoc.reverse((lat, lon), exactly_one=True)
    
    # if a location is found and it's in Estonia, process and output the information
    if locname and locname.raw.get('address', {}).get('county') == 'Eesti':
        county_name = locname.raw['address'].get('county', '')
'''

"# loop through each row with unique latitude and longitude values\nfor coords in forecast_weather[['latitude', 'longitude']].drop_duplicates().itertuples(index=False):\n    # get latitude and longitude\n    lat, lon = coords[0], coords[1]\n    \n    # get the location information for the current coordinates\n    locname = geoLoc.reverse((lat, lon), exactly_one=True)\n    \n    # if a location is found and it's in Estonia, process and output the information\n    if locname and locname.raw.get('address', {}).get('county') == 'Eesti':\n        county_name = locname.raw['address'].get('county', '')\n"