In [76]:
import pandas as pd
import plotly.graph_objects as go

In [77]:
# load uk plants metadata, it also contains endpoints and substations
uk_power_networks_metadata = pd.read_csv("data/UKPowerNetworks2013-2014/UK_Solar_Stations_Location_Capacity.csv")
uk_power_networks_metadata.head(n=100)


Unnamed: 0,name,latitude,longitude,capacity(kw)
0,ALVERSTON CLOSE,50.812063,0.301925,24
1,BANCROFT CLOSE,52.191134,0.143331,49
2,BANKFIELD WAY,51.110636,0.456658,86
3,CARTERS MEAD,51.759948,0.129666,83
4,CHAPEL LN GT GLEMHAM,52.21292,1.423789,15
5,EAST HILLS ROAD COSTESSEY,52.647664,1.209022,116
6,ELM CRES COLCHESTER,51.891382,0.933065,92
7,FAIRVIEW ROAD,51.910623,-0.213771,56
8,FOREST ROAD,51.887576,0.932686,85
9,MAPLE DRIVE EAST,51.152506,0.088442,63


In [79]:
# load uk solar power networks hourly data
uk_power_networks_data = pd.read_csv("data/UKPowerNetworks2013-2014/HourlyDataCustomerEndpoints.csv")
# Get unique substation names from `uk_power_networks_data`
unique_solar_plants = {solar_plant.upper() for solar_plant in uk_power_networks_data["Substation"]}

{'ALVERSTON CLOSE',
 'BANCROFT CLOSE',
 'FOREST ROAD',
 'MAPLE DRIVE EAST',
 'SUFFOLK ROAD',
 'YMCA'}

In [80]:
# remove from dataset with unclear locations or capacities
sites_to_remove = ["ROOKERY FARM BESTHORPE", "RAMPLING COURT", "UPPER STAPLEFIELD COMMON", "YMCA"]
filtered_df = uk_power_networks_data.loc[~uk_power_networks_data["Substation"].isin(sites_to_remove)]

filtered_df.head()
set(filtered_df["Substation"])

{'Alverston Close',
 'Bancroft Close',
 'Forest Road',
 'Maple Drive East',
 'Suffolk Road'}

In [84]:
# remove all power stations that are not solar
filtered_uk_power_networks_metadata = uk_power_networks_metadata[
    uk_power_networks_metadata["name"].isin(unique_solar_plants)
]

filtered_uk_power_networks_metadata.head()

Unnamed: 0,name,latitude,longitude,capacity(kw)
0,ALVERSTON CLOSE,50.812063,0.301925,24
1,BANCROFT CLOSE,52.191134,0.143331,49
8,FOREST ROAD,51.887576,0.932686,85
9,MAPLE DRIVE EAST,51.152506,0.088442,63
15,SUFFOLK ROAD,50.823093,-0.931087,24


In [81]:
# load portugal metadata
file_path = 'data/PortugalPohotovolaicDataset/PV Plants Metadata.xlsx'

# Read the Excel file into a pandas DataFrame
portugal_metadata = pd.read_excel(file_path)

# Display the first few rows of the DataFrame
portugal_metadata.head()

Unnamed: 0,PV Serial Number,Location,Latitude,Longitude,From date,To date,Installed Power (kWp),Connection Power (kWn)
0,84071567,Lisbon,38.728,-9.138,2019-01-01,2022-12-31,46.0,40.0
1,84071569,Lisbon,38.833,-9.191,2019-01-01,2022-12-31,16.32,15.0
2,84071568,Setubal,38.577,-8.872,2019-01-01,2022-12-31,23.52,20.0
3,84071570,Lisbon,38.725,-9.12,2019-01-01,2022-12-31,30.0,27.0
4,84071566,Faro,37.031,-7.893,2019-01-01,2022-12-31,7.0,6.6


In [82]:
# load portugal dataset
file_path = 'data/PortugalPohotovolaicDataset/PV Plants Datasets.xlsx'

# Define the correspondence table between sheet names and real names
correspondence = {
    'Lisbon_1': ['84071567'],
    'Lisbon_2': ['84071569'],
    'Lisbon_3': ['84071570'],
    'Lisbon_4': ['62032213'], 
    'Setubal': ['84071568'],
    'Faro': ['84071566'],
    'Braga': ['62030198'],
    'Tavira': ['73060645'],
    'Loule': ['73061935']
}

data_frames = []
for real_name, sheet_names in correspondence.items():
    for sheet_name in sheet_names:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        df['serial'] = sheet_name
        df['name'] = real_name
        data_frames.append(df)

# Read each sheet into a pandas DataFrame and rename them accordingly
portugal_df = pd.concat(data_frames, ignore_index=True)
portugal_df.head()

Unnamed: 0,Date,Produced Energy (kWh),Specific Energy (kWh/kWp),CO2 Avoided (tons),serial
0,2019-01-01 00:00:00,0.0,0.0,,84071567
1,2019-01-01 01:00:00,0.0,0.0,,84071567
2,2019-01-01 02:00:00,0.0,0.0,,84071567
3,2019-01-01 03:00:00,0.0,0.0,,84071567
4,2019-01-01 04:00:00,0.0,0.0,,84071567


In [96]:
# define a scale factor for markers
scale_factor = 0.2 

fig = go.Figure()

# add the UK dataset as the first trace
fig.add_trace(
    go.Scattermapbox(
        lat=filtered_uk_power_networks_metadata["latitude"],
        lon=filtered_uk_power_networks_metadata["longitude"],
        mode='markers',
        marker=dict(
            size=filtered_uk_power_networks_metadata["capacity(kw)"] * scale_factor, 
            color='blue'  
        ),
        text=filtered_uk_power_networks_metadata["name"],
        hoverinfo='text',
        customdata=filtered_uk_power_networks_metadata["capacity(kw)"],
        hovertemplate="<b>%{text}</b><br>Capacity: %{customdata} kW"
    )
)

# add  Portugal dataset as the second trace
fig.add_trace(
    go.Scattermapbox(
        lat=portugal_metadata["Latitude"],
        lon=portugal_metadata["Longitude"],
        mode='markers',
        marker=dict(
            size=portugal_metadata["Installed Power (kWp)"] * scale_factor,  # Scale marker size based on Installed Power
            color='fuchsia',  # Color for Portugal dataset
        ),
        text=portugal_metadata["PV Serial Number"],
        hoverinfo='text',
        customdata=portugal_metadata[["Installed Power (kWp)", "Location"]],
        hovertemplate="<b>%{text}</b><br>Installed Power: %{customdata[0]} kWp<br>Location: %{customdata[1]}"
    )
)

# set up the Mapbox layout and style
fig.update_layout(
    mapbox=dict(
        style="open-street-map",
        zoom=5,
        center=dict(lat=51, lon=-0.5)
    ),
    margin={"r":0,"t":0,"l":0,"b":0}
)

fig.show()

In [ ]:
uk_power_networks_data_preprocessed = 