In [1]:
import requests
import json
from datetime import datetime
import statistics
import pandas as pd

from osgeo import gdal, osr, ogr
import rasterio
import numpy as np
import statistics
from osgeo import osr
import math
import multiprocessing

In [2]:
dataset = gdal.Open('data\\GRIB_air_pollution\\adaptor.mars.internal-1685222038.7457314-9629-11-395c9a70-7b6e-4dd0-bc89-1331fd305c05.grib', gdal.GA_ReadOnly)
message_count = dataset.RasterCount
LAT_LON_CONVERT = 4/3

#low-cloud-cover == Particulate matter d < 2.5 µm (PM2.5)
#convective-cloud-cover == Particulate matter d < 1 µm (PM1)
#medium-cloud-cover == Particulate matter d < 10 µm (PM10)

meta_map_dict = {
    "Convective cloud cover [%]" : "PM1",
    "Medium cloud cover [%]" : "PM10",
    "Low cloud cover [%]" : "PM2.5"
}

#array[lat, lon]


def lat_lon_converter(lat, lon):
    #latitude +90 -> 0, 0 -> 90, -90 -> 180
    #longitude -180 -> 0, 0 -> 180, 180 -> 360
    new_lat = round(abs(lat - 90) * LAT_LON_CONVERT)
    new_lon = round((lon + 180) * LAT_LON_CONVERT)
    return new_lat, new_lon

In [3]:
df = pd.read_csv("data\\solar_panel_analysis_dataset_final.csv")

id_list = df["GEM phase ID"].tolist()
longitude_list = df["Longitude"].tolist()
latitude_list = df["Latitude"].tolist()

read_coordinates_dict = {}
GRIB_progress = ""
progress = ""

for i in range(len(id_list)):
    read_coordinates_dict[id_list[i]] = {"longitude":longitude_list[i], "latitude": latitude_list[i]}

In [4]:
def create_chunks(lst, chunk_size):
    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]

In [5]:
chunks = create_chunks(list(read_coordinates_dict.keys()), 1000)

In [8]:
for chunk in chunks[3:]:

    final_dict = {}

    for band in range(1, dataset.RasterCount):
        current_GRIB_progress = round((band/dataset.RasterCount) * 100)
        if current_GRIB_progress % 10 == 0 and current_GRIB_progress != GRIB_progress:
            print("{} - Progress on GRIB processing: {}%".format(datetime.now(),current_GRIB_progress))
            GRIB_progress = current_GRIB_progress

        meta = dataset.GetRasterBand(band).GetMetadata()
        particulate = meta_map_dict[meta['GRIB_COMMENT']]
        date = datetime.utcfromtimestamp(int(meta['GRIB_REF_TIME']))
        year = date.strftime('%Y')
        month = date.strftime('%m')
        raster = dataset.GetRasterBand(band)
        array = raster.ReadAsArray()
        scaled_array = array * 1000000000

        for ids in chunk:
            scaled_lat,scaled_lon = lat_lon_converter(read_coordinates_dict[ids]["latitude"],read_coordinates_dict[ids]["longitude"])


            if ids not in final_dict:
                final_dict[ids] = {}
                final_dict[ids]["monthly"] = {}
                final_dict[ids]["yearly"] = {}
                final_dict[ids]["full"] = {}

            if month not in final_dict[ids]["monthly"]:
                final_dict[ids]["monthly"][month] = {}

            if year not in final_dict[ids]["yearly"]:
                final_dict[ids]["yearly"][year] = {}

            if particulate in final_dict[ids]["monthly"][month].keys():
                final_dict[ids]["monthly"][month][particulate].append(scaled_array[scaled_lat,scaled_lon])
            else:
                final_dict[ids]["monthly"][month][particulate] = [scaled_array[scaled_lat,scaled_lon]]


            if particulate in final_dict[ids]["yearly"][year].keys():
                final_dict[ids]["yearly"][year][particulate].append(scaled_array[scaled_lat,scaled_lon])
            else:
                final_dict[ids]["yearly"][year][particulate] = [scaled_array[scaled_lat,scaled_lon]]


            if particulate in final_dict[ids]["full"].keys():
                final_dict[ids]["full"][particulate].append(scaled_array[scaled_lat,scaled_lon])
            else:
                final_dict[ids]["full"][particulate] = [scaled_array[scaled_lat,scaled_lon]]

    for ids, weather_values in final_dict.items():
        for date, param_values in weather_values.items():
            for date_key, values in param_values.items():
                if date == "full":
                    values = param_values
                for particulate in ['PM10', 'PM1', 'PM2.5']:
                    parameter_sum = sum(values[particulate])
                    parameter_count = len(values[particulate])
                    average_parameter = parameter_sum/parameter_count
                    median_parameter = statistics.median(values[particulate])
                    mode_parameter = statistics.mode(values[particulate])
                    stdev_parameter = statistics.stdev(values[particulate])
                    variance_parameter = statistics.variance(values[particulate])
                    max_parameter = max(values[particulate])
                    min_parameter = min(values[particulate])
                    range_parameter = max_parameter - min_parameter
                    values[particulate] = {"average": average_parameter, "median": median_parameter, "mode": mode_parameter, "standard_deviation": stdev_parameter, "variance": variance_parameter, "max": max_parameter, "min": min_parameter, "range": range_parameter}
                if date == "full":
                    break
    print("{} - Statistical processing of GRIB file is finished!".format(datetime.now()))

    for ids in chunk:
        monthly_averages = {}
        yearly_averages = {}
        full_averages = {}

        response = requests.get(
            "https://archive-api.open-meteo.com/v1/archive?",
            params={
                "latitude": read_coordinates_dict[ids]["latitude"],
                "longitude": read_coordinates_dict[ids]["longitude"],
                "start_date": "2005-01-01",
                "end_date": "2015-12-31",
                "hourly": "relativehumidity_2m,dewpoint_2m,precipitation,cloudcover,shortwave_radiation"
            }
        )
        data = json.loads(response.text)

        for date, hum, dewp, precip, cloud, rad in zip(data["hourly"]["time"],data["hourly"]["relativehumidity_2m"], data["hourly"]["dewpoint_2m"], data["hourly"]["precipitation"], data["hourly"]["cloudcover"], data["hourly"]["shortwave_radiation"]):
            date_format = datetime.strptime(date, '%Y-%m-%dT%H:%M')
            year = date_format.strftime('%Y')
            month = date_format.strftime('%m')
            parameter_dict = {'relativehumidity_2m': hum, "dewpoint_2m": dewp, "precipitation": precip, "cloudcover": cloud, "shortwave_radiation" : rad}

            if month not in monthly_averages:
                monthly_averages[month] = {}

            if year not in yearly_averages:
                yearly_averages[year] = {}

            for items, parameter_values in parameter_dict.items():
                if items in monthly_averages[month].keys():
                    monthly_averages[month][items].append(parameter_values)
                else:
                    monthly_averages[month][items] = [parameter_values]


                if items in yearly_averages[year].keys():
                    yearly_averages[year][items].append(parameter_values)
                else:
                    yearly_averages[year][items] = [parameter_values]


                if items in full_averages.keys():
                    full_averages[items].append(parameter_values)
                else:
                    full_averages[items] = [parameter_values]

        parameter_list = list(parameter_dict.keys())

        for date_list in [yearly_averages, monthly_averages, full_averages]:
            for date, values in date_list.items():
                if date_list == full_averages:
                    values = full_averages
                for particulate in parameter_list:
                    parameter_sum = sum(values[particulate])
                    parameter_count = len(values[particulate])
                    average_parameter = parameter_sum/parameter_count
                    median_parameter = statistics.median(values[particulate])
                    mode_parameter = statistics.mode(values[particulate])
                    stdev_parameter = statistics.stdev(values[particulate])
                    variance_parameter = statistics.variance(values[particulate])
                    max_parameter = max(values[particulate])
                    min_parameter = min(values[particulate])
                    range_parameter = max_parameter - min_parameter
                    values[particulate] = {"average": average_parameter, "median": median_parameter, "mode": mode_parameter, "standard_deviation": stdev_parameter, "variance": variance_parameter, "max": max_parameter, "min": min_parameter, "range": range_parameter}
                if date_list == full_averages:
                    break

        for month in monthly_averages.keys():
            final_dict[ids]["monthly"][month].update(monthly_averages[month])

        for year in yearly_averages.keys():
            final_dict[ids]["yearly"][year].update(yearly_averages[year])

        final_dict[ids]["full"].update(full_averages)

        current_progress = round((chunk.index(ids)/len(chunk)) * 100)
        if current_progress % 10 == 0 and current_progress != progress:
            print("{} - Progress of chunk {}/{}: {}%".format(datetime.now(),chunks.index(chunk) + 1, len(chunks), current_progress))
            progress = current_progress

    with open('additional_weather_data\\additional_weather_data_{}.json'.format(chunks.index(chunk) + 1), 'w') as json_file:
        json.dump(final_dict, json_file)
    print('JSON was created for chunk: {}/{}'.format(chunks.index(chunk) + 1, len(chunks)))

2023-06-07 08:27:32.779305 - Progress on GRIB processing: 0%
2023-06-07 08:28:28.828764 - Progress on GRIB processing: 10%
2023-06-07 08:29:17.897929 - Progress on GRIB processing: 20%
2023-06-07 08:30:05.427016 - Progress on GRIB processing: 30%
2023-06-07 08:30:56.683987 - Progress on GRIB processing: 40%
2023-06-07 08:31:46.489871 - Progress on GRIB processing: 50%
2023-06-07 08:32:31.262185 - Progress on GRIB processing: 60%
2023-06-07 08:33:25.484789 - Progress on GRIB processing: 70%
2023-06-07 08:34:07.804374 - Progress on GRIB processing: 80%
2023-06-07 08:34:51.031789 - Progress on GRIB processing: 90%
2023-06-07 08:35:43.217135 - Progress on GRIB processing: 100%
Statistical processing of GRIB file is finished!
2023-06-07 08:55:07.520988 - Progress of chunk 4/9: 10%
2023-06-07 09:04:29.860108 - Progress of chunk 4/9: 20%
2023-06-07 09:14:06.982961 - Progress of chunk 4/9: 30%
2023-06-07 09:24:27.800226 - Progress of chunk 4/9: 40%
2023-06-07 09:34:07.447128 - Progress of chun