In [1]:
#Top 500 Wind Solar Combination marks goes to all Norway Locations
import pandas as pd
from scipy.spatial import cKDTree
import numpy as np

# Input paths
SOLAR_CSV = r"C:\GISDataManipulation\Merged_Denmark_Norway_SolarPVOUT\Merged_SolarPVOUT_WithAverage.csv"
WIND_TXT = r"C:\GISDataManipulation\NorwayDenmarkMergedWindSpeed100m\NorwayDenmarkMergedWindSpeed100mFromTXT.txt"
OUTPUT_TOP500 = r"C:\GISDataManipulation\TopLocation\Top500_High_Wind_Solar_Locations.csv"

# Load solar and wind data
df_solar = pd.read_csv(SOLAR_CSV)
pvout_month_columns = [col for col in df_solar.columns if col.startswith('PVOUT_Month_')]
if 'PVOUT_Average' not in df_solar.columns:
    df_solar['PVOUT_Average'] = df_solar[pvout_month_columns].mean(axis=1)

df_wind = pd.read_csv(WIND_TXT, delimiter=",")
wind_col_candidates = [col for col in df_wind.columns if 'wind' in col.lower() or 'value' in col.lower()]
wind_col = wind_col_candidates[0]
df_wind = df_wind.rename(columns={wind_col: 'Wind_Speed_m_s'})

# KDTree matching
solar_coords = df_solar[['Latitude', 'Longitude']].values
wind_coords = df_wind[['Latitude', 'Longitude']].values

tree = cKDTree(wind_coords)
distances, indices = tree.query(solar_coords, k=1, distance_upper_bound=0.01)  # ~1km tolerance

# Filter out invalid matches (indices equal to df_wind length or distance = inf)
valid_mask = (indices < len(df_wind)) & np.isfinite(distances)
df_valid_solar = df_solar[valid_mask].reset_index(drop=True)
valid_indices = indices[valid_mask]

# Extract corresponding wind speeds
matched_wind = df_wind.iloc[valid_indices].reset_index(drop=True)
df_combined = df_valid_solar.copy()
df_combined['Wind_Speed_m_s'] = matched_wind['Wind_Speed_m_s'].values

# Compute combined score
df_combined['Combined_Score'] = df_combined['PVOUT_Average'] * df_combined['Wind_Speed_m_s']

# Sort by combined score and select top 500
df_top500 = df_combined.sort_values(by='Combined_Score', ascending=False).head(500)

# Save top 500 locations
df_top500.to_csv(OUTPUT_TOP500, index=False)
print(f"✅ Top 500 locations saved at: {OUTPUT_TOP500}")


✅ Top 500 locations saved at: C:\GISDataManipulation\TopLocation\Top500_High_Wind_Solar_Locations.csv


In [2]:
# Top 10 wind and solar locations both for Norway and for Denmark
import pandas as pd
from scipy.spatial import cKDTree
import numpy as np

# Input paths
SOLAR_CSV = r"C:\GISDataManipulation\Merged_Denmark_Norway_SolarPVOUT\Merged_SolarPVOUT_WithAverage.csv"
WIND_TXT = r"C:\GISDataManipulation\NorwayDenmarkMergedWindSpeed100m\NorwayDenmarkMergedWindSpeed100mFromTXT.txt"
OUTPUT_TOP10_NORWAY = r"C:\GISDataManipulation\TopLocation\Norway_Top10_High_Wind_Solar.csv"
OUTPUT_TOP10_DENMARK = r"C:\GISDataManipulation\TopLocation\Denmark_Top10_High_Wind_Solar.csv"

# Load solar data
df_solar = pd.read_csv(SOLAR_CSV)
pvout_month_columns = [col for col in df_solar.columns if col.startswith('PVOUT_Month_')]
if 'PVOUT_Average' not in df_solar.columns:
    df_solar['PVOUT_Average'] = df_solar[pvout_month_columns].mean(axis=1)

# Load wind data
df_wind = pd.read_csv(WIND_TXT, delimiter=",")
wind_col_candidates = [col for col in df_wind.columns if 'wind' in col.lower() or 'value' in col.lower()]
wind_col = wind_col_candidates[0]
df_wind = df_wind.rename(columns={wind_col: 'Wind_Speed_m_s'})

# KDTree matching
solar_coords = df_solar[['Latitude', 'Longitude']].values
wind_coords = df_wind[['Latitude', 'Longitude']].values

tree = cKDTree(wind_coords)
distances, indices = tree.query(solar_coords, k=1, distance_upper_bound=0.01)  # 1 km tolerance

# Filter out invalid matches
valid_mask = (indices < len(df_wind)) & np.isfinite(distances)
df_valid_solar = df_solar[valid_mask].reset_index(drop=True)
valid_indices = indices[valid_mask]

# Extract corresponding wind speeds and merge
matched_wind = df_wind.iloc[valid_indices].reset_index(drop=True)
df_combined = df_valid_solar.copy()
df_combined['Wind_Speed_m_s'] = matched_wind['Wind_Speed_m_s'].values

# Compute combined score
df_combined['Combined_Score'] = df_combined['PVOUT_Average'] * df_combined['Wind_Speed_m_s']

# Separate top 10 for Norway and Denmark
df_norway = df_combined[df_combined['Country'] == 'Norway']
df_denmark = df_combined[df_combined['Country'] == 'Denmark']

top10_norway = df_norway.sort_values(by='Combined_Score', ascending=False).head(10)
top10_denmark = df_denmark.sort_values(by='Combined_Score', ascending=False).head(10)

# Save top 10 locations for each country
top10_norway.to_csv(OUTPUT_TOP10_NORWAY, index=False)
top10_denmark.to_csv(OUTPUT_TOP10_DENMARK, index=False)

print(f"Top 10 best locations for Norway saved at: {OUTPUT_TOP10_NORWAY}")
print(f"Top 10 best locations for Denmark saved at: {OUTPUT_TOP10_DENMARK}")

# Display top 10 results
print("\n Top 10 locations in Norway:")
print(top10_norway[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score']])

print("\n Top 10 locations in Denmark:")
print(top10_denmark[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score']])


Top 10 best locations for Norway saved at: C:\GISDataManipulation\TopLocation\Norway_Top10_High_Wind_Solar.csv
Top 10 best locations for Denmark saved at: C:\GISDataManipulation\TopLocation\Denmark_Top10_High_Wind_Solar.csv

 Top 10 locations in Norway:
         Latitude  Longitude  PVOUT_Average  Wind_Speed_m_s  Combined_Score
285444  59.945833   7.145833      94.583750       13.004805     1230.043188
281067  59.904167   7.162500      94.512917       12.316573     1164.075251
285443  59.945833   7.137500      94.362750       12.298849     1160.553223
243030  59.520833   6.862500      88.329667       13.102022     1157.297251
284560  59.937500   7.120833      94.554333       12.070889     1141.354907
276150  59.854167   8.645833      86.698667       13.152931     1140.341599
174954  58.754167   6.270833      78.694667       14.442714     1136.564543
286324  59.954167   7.112500      94.110833       12.068942     1135.818196
281068  59.904167   7.170833      93.178333       12.180452   

In [3]:
#100 Norway and Denmark Locations
import pandas as pd
from scipy.spatial import cKDTree
from sklearn.cluster import DBSCAN
import numpy as np

# Input paths
SOLAR_CSV = r"C:\GISDataManipulation\Merged_Denmark_Norway_SolarPVOUT\Merged_SolarPVOUT_WithAverage.csv"
WIND_TXT = r"C:\GISDataManipulation\NorwayDenmarkMergedWindSpeed100m\NorwayDenmarkMergedWindSpeed100mFromTXT.txt"
OUTPUT_TOP20_NORWAY_CLUSTERED = r"C:\GISDataManipulation\TopLocation\Norway_Top20_Clustered_High_Wind_Solar.csv"
OUTPUT_TOP20_DENMARK_CLUSTERED = r"C:\GISDataManipulation\TopLocation\Denmark_Top20_Clustered_High_Wind_Solar.csv"

# Load solar data
df_solar = pd.read_csv(SOLAR_CSV)
pvout_month_columns = [col for col in df_solar.columns if col.startswith('PVOUT_Month_')]
if 'PVOUT_Average' not in df_solar.columns:
    df_solar['PVOUT_Average'] = df_solar[pvout_month_columns].mean(axis=1)

# Load wind data
df_wind = pd.read_csv(WIND_TXT, delimiter=",")
wind_col_candidates = [col for col in df_wind.columns if 'wind' in col.lower() or 'value' in col.lower()]
wind_col = wind_col_candidates[0]
df_wind = df_wind.rename(columns={wind_col: 'Wind_Speed_m_s'})

# KDTree matching to align solar and wind points
solar_coords = df_solar[['Latitude', 'Longitude']].values
wind_coords = df_wind[['Latitude', 'Longitude']].values
tree = cKDTree(wind_coords)
distances, indices = tree.query(solar_coords, k=1, distance_upper_bound=0.01)  # ~1 km tolerance

# Filter only valid matches
valid_mask = (indices < len(df_wind)) & np.isfinite(distances)
df_valid_solar = df_solar[valid_mask].reset_index(drop=True)
valid_indices = indices[valid_mask]

matched_wind = df_wind.iloc[valid_indices].reset_index(drop=True)
df_combined = df_valid_solar.copy()
df_combined['Wind_Speed_m_s'] = matched_wind['Wind_Speed_m_s'].values
df_combined['Combined_Score'] = df_combined['PVOUT_Average'] * df_combined['Wind_Speed_m_s']

# Function to cluster and manually select best locations without warnings
def cluster_and_select_top(df_country, eps=0.005, top_n=100):
    df_country = df_country.copy()
    coords = df_country[['Latitude', 'Longitude']].values
    clustering = DBSCAN(eps=eps, min_samples=1).fit(coords)
    df_country.loc[:, 'Cluster'] = clustering.labels_

    best_points = []
    for cluster_id in df_country['Cluster'].unique():
        cluster_data = df_country[df_country['Cluster'] == cluster_id]
        best_location = cluster_data.sort_values(by='Combined_Score', ascending=False).iloc[0]
        best_points.append(best_location)

    best_points_df = pd.DataFrame(best_points)
    return best_points_df.sort_values(by='Combined_Score', ascending=False).head(top_n)

# Separate by country and process
df_norway = df_combined[df_combined['Country'] == 'Norway']
df_denmark = df_combined[df_combined['Country'] == 'Denmark']

top20_norway_clustered = cluster_and_select_top(df_norway, eps=0.005, top_n=100)
top20_denmark_clustered = cluster_and_select_top(df_denmark, eps=0.005, top_n=100)

# Save results
top20_norway_clustered.to_csv(OUTPUT_TOP20_NORWAY_CLUSTERED, index=False)
top20_denmark_clustered.to_csv(OUTPUT_TOP20_DENMARK_CLUSTERED, index=False)

print(f"Top 20 clustered locations for Norway saved at: {OUTPUT_TOP20_NORWAY_CLUSTERED}")
print(f"Top 20 clustered locations for Denmark saved at: {OUTPUT_TOP20_DENMARK_CLUSTERED}")

# Display results
print("\n Top 20 clustered best locations in Norway:")
print(top20_norway_clustered[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score']])

print("\n Top 20 clustered best locations in Denmark:")
print(top20_denmark_clustered[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score']])


Top 20 clustered locations for Norway saved at: C:\GISDataManipulation\TopLocation\Norway_Top20_Clustered_High_Wind_Solar.csv
Top 20 clustered locations for Denmark saved at: C:\GISDataManipulation\TopLocation\Denmark_Top20_Clustered_High_Wind_Solar.csv

 Top 20 clustered best locations in Norway:
         Latitude  Longitude  PVOUT_Average  Wind_Speed_m_s  Combined_Score
285444  59.945833   7.145833      94.583750       13.004805     1230.043188
281067  59.904167   7.162500      94.512917       12.316573     1164.075251
285443  59.945833   7.137500      94.362750       12.298849     1160.553223
243030  59.520833   6.862500      88.329667       13.102022     1157.297251
284560  59.937500   7.120833      94.554333       12.070889     1141.354907
...           ...        ...            ...             ...             ...
277725  59.870833   7.895833      92.543250       11.094862     1026.754586
243029  59.520833   6.854167      87.599000       11.719531     1026.619201
286322  59.954167

In [4]:
# Implementation of Cluster Size
import pandas as pd
from scipy.spatial import cKDTree
from sklearn.cluster import DBSCAN
import numpy as np

# Input paths
SOLAR_CSV = r"C:\GISDataManipulation\Merged_Denmark_Norway_SolarPVOUT\Merged_SolarPVOUT_WithAverage.csv"
WIND_TXT = r"C:\GISDataManipulation\NorwayDenmarkMergedWindSpeed100m\NorwayDenmarkMergedWindSpeed100mFromTXT.txt"
OUTPUT_TOP20_NORWAY_CLUSTERED = r"C:\GISDataManipulation\TopLocation\Norway_Top20_Clustered_High_Wind_Solar_wClusterSize.csv"
OUTPUT_TOP20_DENMARK_CLUSTERED = r"C:\GISDataManipulation\TopLocation\Denmark_Top20_Clustered_High_Wind_Solar_wClusterSize.csv"

# Load solar data
df_solar = pd.read_csv(SOLAR_CSV)
pvout_month_columns = [col for col in df_solar.columns if col.startswith('PVOUT_Month_')]
if 'PVOUT_Average' not in df_solar.columns:
    df_solar['PVOUT_Average'] = df_solar[pvout_month_columns].mean(axis=1)

# Load wind data
df_wind = pd.read_csv(WIND_TXT, delimiter=",")
wind_col_candidates = [col for col in df_wind.columns if 'wind' in col.lower() or 'value' in col.lower()]
wind_col = wind_col_candidates[0]
df_wind = df_wind.rename(columns={wind_col: 'Wind_Speed_m_s'})

# KDTree spatial matching between solar and wind datasets
solar_coords = df_solar[['Latitude', 'Longitude']].values
wind_coords = df_wind[['Latitude', 'Longitude']].values
tree = cKDTree(wind_coords)
distances, indices = tree.query(solar_coords, k=1, distance_upper_bound=0.05)  # ~1 km tolerance

# Keep only valid matches
valid_mask = (indices < len(df_wind)) & np.isfinite(distances)
df_valid_solar = df_solar[valid_mask].reset_index(drop=True)
valid_indices = indices[valid_mask]

matched_wind = df_wind.iloc[valid_indices].reset_index(drop=True)
df_combined = df_valid_solar.copy()
df_combined['Wind_Speed_m_s'] = matched_wind['Wind_Speed_m_s'].values
df_combined['Combined_Score'] = df_combined['PVOUT_Average'] * df_combined['Wind_Speed_m_s']

# Function to cluster locations and select best point per cluster along with cluster size
def cluster_and_select_top(df_country, eps=0.01, top_n=50):
    df_country = df_country.copy()
    coords = df_country[['Latitude', 'Longitude']].values
    clustering = DBSCAN(eps=eps, min_samples=1).fit(coords)
    df_country.loc[:, 'Cluster'] = clustering.labels_

    # Calculate cluster sizes
    cluster_sizes = df_country['Cluster'].value_counts().to_dict()

    best_points = []
    for cluster_id in df_country['Cluster'].unique():
        cluster_data = df_country[df_country['Cluster'] == cluster_id]
        best_location = cluster_data.sort_values(by='Combined_Score', ascending=False).iloc[0].copy()
        best_location['Cluster_Size'] = cluster_sizes[cluster_id]
        best_points.append(best_location)

    best_points_df = pd.DataFrame(best_points)
    return best_points_df.sort_values(by='Combined_Score', ascending=False).head(top_n)

# Separate by country
df_norway = df_combined[df_combined['Country'] == 'Norway']
df_denmark = df_combined[df_combined['Country'] == 'Denmark']

# Cluster and select top 20 for each country
top20_norway_clustered = cluster_and_select_top(df_norway, eps=0.01, top_n=50)
top20_denmark_clustered = cluster_and_select_top(df_denmark, eps=0.01, top_n=50)

# Save final output with Cluster Size column
top20_norway_clustered.to_csv(OUTPUT_TOP20_NORWAY_CLUSTERED, index=False)
top20_denmark_clustered.to_csv(OUTPUT_TOP20_DENMARK_CLUSTERED, index=False)

print(f"Top 20 clustered locations for Norway saved at: {OUTPUT_TOP20_NORWAY_CLUSTERED}")
print(f"Top 20 clustered locations for Denmark saved at: {OUTPUT_TOP20_DENMARK_CLUSTERED}")

# Display results preview
print("\n Top 20 clustered best locations in Norway (with cluster size):")
print(top20_norway_clustered[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score', 'Cluster_Size']])

print("\n Top 20 clustered best locations in Denmark (with cluster size):")
print(top20_denmark_clustered[['Latitude', 'Longitude', 'PVOUT_Average', 'Wind_Speed_m_s', 'Combined_Score', 'Cluster_Size']])


Top 20 clustered locations for Norway saved at: C:\GISDataManipulation\TopLocation\Norway_Top20_Clustered_High_Wind_Solar_wClusterSize.csv
Top 20 clustered locations for Denmark saved at: C:\GISDataManipulation\TopLocation\Denmark_Top20_Clustered_High_Wind_Solar_wClusterSize.csv

 Top 20 clustered best locations in Norway (with cluster size):
         Latitude  Longitude  PVOUT_Average  Wind_Speed_m_s  Combined_Score  \
288521  59.945833   7.145833       94.58375       13.004805     1230.043188   

        Cluster_Size  
288521        150530  

 Top 20 clustered best locations in Denmark (with cluster size):
         Latitude  Longitude  PVOUT_Average  Wind_Speed_m_s  Combined_Score  \
36157   55.287500  14.762500      92.272500       10.040957      926.504246   
65973   55.737500  10.904167      90.686667       10.104390      916.333461   
116118  56.712500  11.537500      91.895083        9.769578      897.776183   
94260   56.195833  11.704167      90.877000        9.772202      888

In [5]:
# Best 5 Optimal Locations
import pandas as pd
from sklearn.cluster import DBSCAN

# Paths: Replace these with your actual top 100 output files if separate
INPUT_TOP100_NORWAY = r"C:\GISDataManipulation\TopLocation\Norway_Top20_Clustered_High_Wind_Solar_wClusterSize.csv"
INPUT_TOP100_DENMARK = r"C:\GISDataManipulation\TopLocation\Denmark_Top20_Clustered_High_Wind_Solar_wClusterSize.csv"
OUTPUT_NORWAY_5_OPTIMAL = r"C:\GISDataManipulation\TopLocation\Norway_5_Optimal_Clustered_Locations.csv"
OUTPUT_DENMARK_5_OPTIMAL = r"C:\GISDataManipulation\TopLocation\Denmark_5_Optimal_Clustered_Locations.csv"

def select_top_5_clusters(file_path, output_path, eps=0.05):
    df = pd.read_csv(file_path)
    df = df.copy()

    # Re-cluster to find densest areas
    coords = df[['Latitude', 'Longitude']].values
    clustering = DBSCAN(eps=eps, min_samples=1).fit(coords)
    df.loc[:, 'Cluster'] = clustering.labels_

    # Count cluster sizes
    cluster_sizes = df['Cluster'].value_counts().reset_index()
    cluster_sizes.columns = ['Cluster', 'Size']

    # Merge cluster sizes into the dataframe
    df = df.merge(cluster_sizes, on='Cluster', how='left')

    # Select the top 5 largest clusters
    top_5_clusters = cluster_sizes.head(5)['Cluster'].tolist()

    optimal_points = []
    for cluster_id in top_5_clusters:
        cluster_data = df[df['Cluster'] == cluster_id]
        # Pick the best location by combined score from that cluster
        best_location = cluster_data.sort_values(by='Combined_Score', ascending=False).iloc[0]
        best_location['Cluster_Size'] = cluster_data.shape[0]
        optimal_points.append(best_location)

    df_optimal = pd.DataFrame(optimal_points)
    df_optimal.to_csv(output_path, index=False)

    print(f"Saved 5 optimal clustered locations to: {output_path}")
    print(df_optimal[['Latitude', 'Longitude', 'Combined_Score', 'Cluster_Size']])

# Process Norway & Denmark
select_top_5_clusters(INPUT_TOP100_NORWAY, OUTPUT_NORWAY_5_OPTIMAL, eps=0.05)
select_top_5_clusters(INPUT_TOP100_DENMARK, OUTPUT_DENMARK_5_OPTIMAL, eps=0.05)


Saved 5 optimal clustered locations to: C:\GISDataManipulation\TopLocation\Norway_5_Optimal_Clustered_Locations.csv
    Latitude  Longitude  Combined_Score  Cluster_Size
0  59.945833   7.145833     1230.043188             1
Saved 5 optimal clustered locations to: C:\GISDataManipulation\TopLocation\Denmark_5_Optimal_Clustered_Locations.csv
    Latitude  Longitude  Combined_Score  Cluster_Size
0  55.287500  14.762500      926.504246             1
1  55.737500  10.904167      916.333461             1
2  56.712500  11.537500      897.776183             1
3  56.195833  11.704167      888.068359             1
4  57.137500  11.012500      883.020963             1


In [6]:
#Combined Norway & Denmark clustered optimal locations map
import pandas as pd
import folium
import random
from folium.plugins import MarkerCluster

# File paths
NORWAY_CLUSTERED_FILE = r"C:\GISDataManipulation\TopLocation\Norway_5_Optimal_Clustered_Locations.csv"
DENMARK_CLUSTERED_FILE = r"C:\GISDataManipulation\TopLocation\Denmark_5_Optimal_Clustered_Locations.csv"
OUTPUT_COMBINED_MAP = r"C:\GISDataManipulation\TopLocation\Norway_Denmark_Clustered_OptimalLocations_Map.html"

def random_color():
    return "#{:06x}".format(random.randint(0, 0xFFFFFF))

# Load data for both countries
df_norway = pd.read_csv(NORWAY_CLUSTERED_FILE)
df_norway['Country'] = 'Norway'
df_denmark = pd.read_csv(DENMARK_CLUSTERED_FILE)
df_denmark['Country'] = 'Denmark'

# Combine both dataframes
df_combined = pd.concat([df_norway, df_denmark], ignore_index=True)

# Create combined folium map
combined_map = folium.Map(location=[60, 10], zoom_start=5)

# Assign random colors for each cluster number
cluster_colors = {cluster_id: random_color() for cluster_id in df_combined['Cluster'].unique()}
marker_cluster = MarkerCluster().add_to(combined_map)

# Plot each location on the map
for _, row in df_combined.iterrows():
    cluster_id = row['Cluster']
    color = cluster_colors[cluster_id]
    popup_text = f"""
    <b>Country:</b> {row['Country']}<br>
    <b>Cluster Number:</b> {cluster_id}<br>
    <b>Combined Score:</b> {row['Combined_Score']:.2f}<br>
    <b>PVOUT Average:</b> {row['PVOUT_Average']:.2f} kWh/m²/day<br>
    <b>Wind Speed:</b> {row['Wind_Speed_m_s']:.2f} m/s<br>
    <b>Latitude:</b> {row['Latitude']}<br>
    <b>Longitude:</b> {row['Longitude']}<br>
    <b>Cluster Size:</b> {row.get('Cluster_Size', 'N/A')}
    """

    marker_style = dict(
        location=[row['Latitude'], row['Longitude']],
        radius=7,
        color='black' if row['Country'] == 'Norway' else 'darkblue',
        fill=True,
        fill_color=color,
        fill_opacity=0.9,
        popup=folium.Popup(popup_text, max_width=300)
    )
    folium.CircleMarker(**marker_style).add_to(marker_cluster)

# Save the combined interactive map
combined_map.save(OUTPUT_COMBINED_MAP)
print(f"Combined Norway & Denmark clustered optimal locations map saved at: {OUTPUT_COMBINED_MAP}")


Combined Norway & Denmark clustered optimal locations map saved at: C:\GISDataManipulation\TopLocation\Norway_Denmark_Clustered_OptimalLocations_Map.html
