In [None]:
!pip install geopy hvplot optuna plotly pytz joblib tqdm bokeh cartopy geoviews pyproj bokeh_sampledata

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting hvplot
  Downloading hvplot-0.11.1-py3-none-any.whl.metadata (15 kB)
Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting plotly
  Downloading plotly-5.24.1-py3-none-any.whl.metadata (7.3 kB)
Collecting cartopy
  Downloading Cartopy-0.24.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting geoviews
  Downloading geoviews-1.13.0-py3-none-any.whl.metadata (8.5 kB)
Collecting pyproj
  Downloading pyproj-3.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)
Collecting bokeh_sampledata
  Downloading bokeh_sampledata-2024.2-py3-none-any.whl.metadata (2.6 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Collecting colorcet>=2 (from hvplot)
  Downloading colorcet-3.1.0-py3-none-any.whl.metadata (6.3 kB)
Collecting holoviews>=1.19.0 (from 

In [None]:
import copy
import os
import sys
from scipy.spatial import ConvexHull

import geopandas as gpd
from shapely.geometry import MultiPoint, Polygon, Point
import geopy.distance
import hvplot.pandas
import numpy as np
import optuna
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pytz
import tensorflow as tf
from bokeh.sampledata.penguins import data as df
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tqdm import tqdm

In [None]:
input_path = r'../../data/NREL/california_2006/united_processed_data'
united_data_df = pd.read_csv(os.path.join(input_path, "united_data_2006.csv"))
sensor_location = pd.read_csv(os.path.join(input_path, "sensors_location_2006.csv"))
sensor_location


In [None]:
def calc_convex_hull(longitudes, latitudes):
    points = list(zip(longitudes, latitudes))
    gdf = gpd.GeoDataFrame(geometry=[MultiPoint(points)])
    
    # Compute the convex hull
    convex_hull = gdf.union_all().convex_hull
    # Extract the convex hull coordinates
    if convex_hull.geom_type == 'Polygon':
        hull_coords = np.array(convex_hull.exterior.coords)
    else:
        hull_coords = np.array(convex_hull.coords)
    return hull_coords

def calc_bounding_box(longitudes, latitudes):

    points = list(zip(longitudes, latitudes))
    gdf = gpd.GeoDataFrame(geometry=[MultiPoint(points)])
    
    # Compute the bounding box
    bounding_box = gdf.union_all().envelope
    
    # Extract the bounding box coordinates
    if bounding_box.geom_type == 'Polygon':
        bbox_coords = np.array(bounding_box.exterior.coords)
    else:
        # For multi-polygon cases, we handle them as follows
        bbox_coords = np.array(bounding_box[0].exterior.coords)
    
    return bbox_coords


def generate_grid_within_hull(coords, grid_resolution):
    """
    Generate grid points within the convex hull defined by the coordinates.

    Parameters:
    coords (np.ndarray): Array of shape (n, 2) with longitude and latitude points of the convex hull.
    grid_resolution (float): Spacing between grid points.

    Returns:
    np.ndarray: Array of grid points (longitude, latitude) inside the convex hull.
    """
    # Create a Polygon object for the convex hull
    hull_polygon = Polygon(coords)

    # Extract bounding box
    min_lon, min_lat = np.min(coords, axis=0)
    max_lon, max_lat = np.max(coords, axis=0)

    # Generate grid points
    lon_points = np.arange(min_lon, max_lon + grid_resolution, grid_resolution)
    lat_points = np.arange(min_lat, max_lat + grid_resolution, grid_resolution)
    grid_points = np.array([[lon, lat] for lon in lon_points for lat in lat_points])

    # Filter grid points inside the convex hull
    inside_points = [point for point in grid_points if hull_polygon.contains(Point(point))]

    # Convert to numpy array for further use
    return np.array(inside_points)

In [None]:
lon_vec = np.sort(np.unique(sensor_location["Longitude"]))
lat_vec = np.sort(np.unique(sensor_location["Latitude"]))

coords_1 = (lat_vec[1], lon_vec[1])
coords_2 = (lat_vec[0], lon_vec[1])
coords_3 = (lat_vec[0], lon_vec[0])

lat_dist = geopy.distance.geodesic(coords_1, coords_2).km
lon_dist = geopy.distance.geodesic(coords_2, coords_3).km

print("num of sensors: " + str(len(lat_vec)) + "x" + str(len(lon_vec)))
print("X distance = " + str(lon_dist))
print("Y distance = " + str(lat_dist))
print("total X distance = " + str((len(lon_vec) - 1) * lon_dist))
print("total Y distance = " + str((len(lat_vec) - 1) * lat_dist))
print("Max Longitude = " + str(max(lon_vec)))
print("Min Longitude = " + str(min(lon_vec)))
print("Max Latitude = " + str(max(lat_vec)))
print("Min Latitude = " + str(min(lat_vec)))

tested_sensor_location = sensor_location.copy()
lon_lat_ch = calc_bounding_box(sensor_location.Longitude.to_list(), sensor_location.Latitude.to_list())
lon_lat_grid = generate_grid_within_hull(lon_lat_ch, 0.2)

df_hull = pd.DataFrame(lon_lat_ch, columns=['Longitude', 'Latitude'])
df_obs_grid = pd.DataFrame(lon_lat_grid, columns=['Longitude', 'Latitude'])
# Plot new points
hull_plot = df_hull.hvplot.polygons(
    x="Longitude",
    y="Latitude",
    cmap="bwr",
    alpha = 0.2,
    geo=True,
    tiles="CartoLight"
)

obs_grid_plot = df_obs_grid.hvplot.points(
    x="Longitude",
    y="Latitude",
    alpha = 0.2,
    geo=True,
    tiles="CartoLight"
)

tested_sensor_location_plot = tested_sensor_location.hvplot.points(
    x="Longitude",
    y="Latitude",
    geo=True,
    tiles="CartoLight",
)

obs_grid_plot * tested_sensor_location_plot * hull_plot

In [None]:
def geodesic_distance_matrix(points):
    num_points = len(points)
    distances = np.zeros((num_points, num_points))
    for i in range(num_points):
        for j in range(num_points):
            if i != j:
                distances[i, j] = geodesic(points[i], points[j]).km
            else:
                distances[i, j] = 0  # Distance to itself is 0
    return distances

def idw_interpolation(lon_grid, lat_grid, lon_data, lat_data, values, power=2):
    grid_points = np.vstack([lat_grid, lon_grid]).T
    data_points = np.vstack([lat_data, lon_data]).T
    
    distances = np.array([[geodesic(p1, p2).km for p1 in grid_points] for p2 in data_points])
    weights = 1 / (distances**power+1)
    weights[np.isinf(weights)] = 0  # Handle zero distances
    weighted_values = np.sum(weights * values, axis=1)
    sum_weights = np.sum(weights, axis=1)
    return weighted_values / sum_weights

In [None]:
geodesic_distance_matrix(points)
# points

In [None]:
import os
import pandas as pd
import plotly.express as px
import geopy
coords_1 = sensor_location.iloc[0][['Latitude', 'Longitude']].tolist()
coords_2 = sensor_location.iloc[1][['Latitude', 'Longitude']].tolist()
geopy.distance.geodesic(coords_1, coords_2).km

In [None]:
i = 300
example_file_name = all_files_list[i]
example_df = pd.read_csv(os.path.join(input_path, example_file_name))
example_df['LocalTime'] = pd.to_datetime(example_df['LocalTime'])
example_df.rename(columns={"Power(MW)": "power_mw", "LocalTime": "local_time"}, inplace=True)
example_df

In [None]:
px.line(x=example_df.local_time, y=example_df.power_mw).show()