In [None]:
import time

import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import requests
import json
import datetime

# Load Metadata

In [None]:
# load uk plants metadata, it also contains endpoints and substations
uk_power_networks_metadata = pd.read_csv("data/UKPowerNetworks2013-2014/UK_Solar_Stations_Location_Capacity.csv")
uk_power_networks_metadata.head()

In [None]:
# Read the Excel file into a pandas DataFrame
portugal_metadata = pd.read_excel("data/PortugalPhotovoltaicDataset/PV Plants Metadata.xlsx")
portugal_metadata.head()

# Load Datasets

In [None]:
# load uk solar power networks hourly data
uk_power_networks_df = pd.read_csv("data/UKPowerNetworks2013-2014/HourlyDataCustomerEndpoints.csv")

In [None]:
# load portugal dataset
file_path = 'data/PortugalPhotovoltaicDataset/PV Plants Datasets.xlsx'

# define the correspondence table between sheet names and real names
correspondence = {
    'Lisbon_1': ['84071567'],
    'Lisbon_2': ['84071569'],
    'Lisbon_3': ['84071570'],
    'Lisbon_4': ['62032213'], 
    'Setubal': ['84071568'],
    'Faro': ['84071566'],
    'Braga': ['62030198'],
    'Tavira': ['73060645'],
    'Loule': ['73061935']
}

data_frames = []
for real_name, sheet_names in correspondence.items():
    for sheet_name in sheet_names:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        df['serial'] = sheet_name
        df['name'] = real_name
        data_frames.append(df)

portugal_df = pd.concat(data_frames, ignore_index=True)
portugal_df.head()

# Dataset Overview

## UK dataset

In [None]:
uk_power_networks_df.info()


In [None]:
uk_power_networks_df.isna().sum()

# Portuguese dataset

In [None]:
portugal_df.info()

In [None]:
portugal_df.isna().sum()

# Data Preparation

## UK

In [None]:
# remove from dataset with unclear locations or capacities
sites_to_remove = ["ROOKERY FARM BESTHORPE", "RAMPLING COURT", "UPPER STAPLEFIELD COMMON", "YMCA"]
uk_power_networks_df = uk_power_networks_df.loc[~uk_power_networks_df["Substation"].isin(sites_to_remove)]

uk_power_networks_df.head()
set(uk_power_networks_df["Substation"])

In [None]:
# remove all power stations that are not solar
uk_power_networks_metadata['name'] = uk_power_networks_metadata['name'].str.title()
unique_solar_plants = {solar_plant for solar_plant in uk_power_networks_df["Substation"]}
uk_power_networks_metadata = uk_power_networks_metadata[
    uk_power_networks_metadata["name"].isin(unique_solar_plants)
]

uk_power_networks_metadata.head(n=1000)

In [None]:
uk_power_networks_df["P_GEN_AVERAGE"] = round((uk_power_networks_df["P_GEN_MIN"] + uk_power_networks_df["P_GEN_MAX"]) / 2, 2) 

In [None]:
uk_power_networks_df = uk_power_networks_df[["SerialNo", "Substation", "datetime", "P_GEN_AVERAGE"]]
uk_power_networks_df.rename(
    columns = {
        'SerialNo': 'serial',
        'Substation': 'name',
        'P_GEN_AVERAGE': 'produced energy'
    },
    inplace=True
)
uk_power_networks_df.info()

In [None]:
uk_power_networks_df = uk_power_networks_df.merge(
    uk_power_networks_metadata[["name", "capacity(kwp)"]],  # Columns to bring
    left_on="name",  # Column in uk_power_networks_df
    right_on="name",  # Column in uk_power_networks_metadata
    how="left"  # Type of join
)

In [None]:
uk_power_networks_df.dropna(inplace=True)
uk_power_networks_df.reset_index(drop=True, inplace=True)

In [None]:
uk_power_networks_df

In [None]:
# Ensure 'datetime' column in 'uk_power_networks_df' is in datetime format
uk_power_networks_df['datetime'] = pd.to_datetime(uk_power_networks_df['datetime'])

# Group by 'name' and calculate the start and end dates per station
uk_date_range_per_station = uk_power_networks_df.groupby('name')['datetime'].agg(
    start_date='min', 
    end_date='max'
).reset_index()

# Merge the start and end dates into the 'uk_metadata' DataFrame based on the station name
uk_power_networks_metadata = uk_power_networks_metadata.merge(
    uk_date_range_per_station,
    on='name',  # Assuming 'name' is the matching column in both DataFrames
    how='left'
)
# 
# Display the updated uk_metadata with start and end dates
uk_power_networks_metadata

In [None]:
# Ensure dates are in datetime format and strip time
uk_power_networks_metadata["start_date"] = pd.to_datetime(uk_power_networks_metadata["start_date"]).dt.strftime('%Y-%m-%d')
uk_power_networks_metadata["end_date"] = pd.to_datetime(uk_power_networks_metadata["end_date"]).dt.strftime('%Y-%m-%d')

## Portugal

In [None]:
# Ensure dates are in datetime format and strip time
portugal_metadata["From date"] = pd.to_datetime(portugal_metadata["From date"]).dt.strftime('%Y-%m-%d')
portugal_metadata["To date"] = pd.to_datetime(portugal_metadata["To date"]).dt.strftime('%Y-%m-%d')

In [None]:
portugal_df = portugal_df[["serial", "name", "Date", "Produced Energy (kWh)"]]
portugal_df.rename(
    columns={
        "Date": "datetime",
        "Produced Energy (kWh)": "produced energy"
    },
    inplace=True
)
portugal_df.head()

In [None]:
# Merge portugal_df with portugal_metadata based on the "serial" column
portugal_df['serial'] = portugal_df['serial'].astype(str)
portugal_metadata['PV Serial Number'] = portugal_metadata['PV Serial Number'].astype(str)

portugal_df = portugal_df.merge(
    portugal_metadata[["PV Serial Number", "Installed Power (kWp)"]],  # Columns to bring
    left_on="serial",  # Column in portugal_df
    right_on="PV Serial Number",  # Column in portugal_metadata
    how="left"  # Type of join
)

# Data Visualization

In [None]:
# define a scale factor for markers
scale_factor = 0.2 

fig = go.Figure()

# add the UK dataset as the first trace
fig.add_trace(
    go.Scattermapbox(
        lat=uk_power_networks_metadata["latitude"],
        lon=uk_power_networks_metadata["longitude"],
        mode='markers',
        marker=dict(
            size=uk_power_networks_metadata["capacity(kwp)"] * scale_factor, 
            color='blue'  
        ),
        text=uk_power_networks_metadata["name"],
        hoverinfo='text',
        customdata=uk_power_networks_metadata["capacity(kwp)"],
        hovertemplate="<b>%{text}</b><br>Capacity: %{customdata} kW"
    )
)

# add  Portugal dataset as the second trace
fig.add_trace(
    go.Scattermapbox(
        lat=portugal_metadata["Latitude"],
        lon=portugal_metadata["Longitude"],
        mode='markers',
        marker=dict(
            size=portugal_metadata["Installed Power (kWp)"] * scale_factor,  # Scale marker size based on Installed Power
            color='fuchsia',  # Color for Portugal dataset
        ),
        text=portugal_metadata["PV Serial Number"],
        hoverinfo='text',
        customdata=portugal_metadata[["Installed Power (kWp)", "Location"]],
        hovertemplate="<b>%{text}</b><br>Installed Power: %{customdata[0]} kWp<br>Location: %{customdata[1]}"
    )
)

# set up the Mapbox layout and style
fig.update_layout(
    mapbox=dict(
        style="open-street-map",
        zoom=5,
        center=dict(lat=51, lon=-0.5)
    ),
    margin={"r":0,"t":0,"l":0,"b":0}
)

fig.show()

In [None]:
plt.figure(figsize=(10, 8))

# Concatenate both datasets
combined_df = pd.concat([portugal_df, uk_power_networks_df])

# Group by 'name' and calculate the sum of 'Produced Energy (kWh)' for each station
energy_sum = combined_df.groupby('name').size()

# Plotting
plt.figure(figsize=(10, 8))
energy_sum.plot(kind='barh', color='skyblue')  # Horizontal bar plot

plt.title('Total Number of Records per Station')
plt.xlabel('Number of Records')
plt.ylabel('Station Name')
plt.show()

# Fetch Weather Data

In [None]:
# # Base URL for the Open Meteo API
# open_meteo_base_url = "https://archive-api.open-meteo.com/v1/archive"
# 
# # Template for fixed parameters
# fixed_params = {
#     "hourly": ",".join([
#         "temperature_2m",
#         "relative_humidity_2m",
#         "dew_point_2m",
#         "pressure_msl",
#         "cloud_cover",
#         "et0_fao_evapotranspiration",
#         "wind_speed_10m",
#         "wind_direction_10m",
#         "shortwave_radiation",
#         "diffuse_radiation",
#         "direct_normal_irradiance",
#         "global_tilted_irradiance",
#         "terrestrial_radiation"
#     ])
# }
# 
# def fetch_weather_data(latitude, longitude, start_date, end_date):
#     # Create a copy of the fixed parameters and add location and date-specific parameters
#     params = fixed_params.copy()
#     params.update({
#         "latitude": latitude,
#         "longitude": longitude,
#         "start_date": start_date,
#         "end_date": end_date
#     })
#     
#     # Send the request to the Open Meteo API
#     response = requests.get(open_meteo_base_url, params)
#     
#     # Check if the request was successful
#     if response.status_code == 200:
#         # Convert response to JSON
#         data = response.json()
#         return data  # You could print or process the data as needed
#     else:
#         # Print an error message if the request failed
#         print(f"Error: {response.status_code} - {response.text}")
#         return None

In [None]:
# uk_power_networks_metadata.head()

In [None]:
# # fetch portugal weather
# portugal_weather_df = []
# 
# for _, solar_plant in portugal_metadata.iterrows():
#     weather_data = fetch_weather_data(latitude=solar_plant["Latitude"], longitude=solar_plant["Longitude"], start_date=solar_plant["From date"], end_date=solar_plant["To date"])
#     hourly_data = weather_data["hourly"]
#     hourly_data["serial"] = solar_plant["PV Serial Number"]
#     portugal_weather_df.append(pd.DataFrame(hourly_data))
#     time.sleep(30) # to avoid minutely limit
# 
# portugal_weather_df = pd.concat(portugal_weather_df, ignore_index=True)
# portugal_weather_df.to_csv("data/PortugalPhotovoltaicDataset/weather.csv", index=False)

In [None]:
# # fetch uk weather
# uk_weather_df = []
# 
# for _, solar_plant in uk_power_networks_metadata.iterrows():
#     weather_data = fetch_weather_data(latitude=solar_plant["latitude"], longitude=solar_plant["longitude"], start_date=solar_plant["start_date"], end_date=solar_plant["end_date"])
#     hourly_data = weather_data["hourly"]
#     hourly_data["serial"] = solar_plant["name"]
#     uk_weather_df.append(pd.DataFrame(hourly_data))
#     time.sleep(30) # to avoid minutely limit
# 
# uk_weather_df = pd.concat(uk_weather_df, ignore_index=True)
# uk_weather_df.to_csv("data/UKPowerNetworks2013-2014/weather.csv", index=False)

In [None]:
# extract weather by timestamp

In [None]:
# combine solar dataset and weather

In [None]:
portugal_weather_df = pd.read_csv("data/PortugalPhotovoltaicDataset/weather.csv")
portugal_weather_df.head(n=100)

In [None]:
uk_weather_df = pd.read_csv("data/UKPowerNetworks2013-2014/weather.csv")
uk_weather_df.head(n=100)

# Merge SOLAR DF and WEATHER

In [None]:
# PORTUGAL: merge weather data with solar datasets 
solar_data = portugal_df
weather_data = portugal_weather_df

# Ensure 'serial' columns have the same type
solar_data['serial'] = solar_data['serial'].astype(str)
weather_data['serial'] = weather_data['serial'].astype(str)

# Ensure the keys are formatted consistently
solar_data['datetime'] = pd.to_datetime(solar_data['datetime']).dt.tz_localize(None)  # Remove timezone info
weather_data['time'] = pd.to_datetime(weather_data['time'])  # Keep timezone if needed

# Merge on 'serial' and 'datetime'
combined_data = pd.merge(
    solar_data,
    weather_data,
    left_on=['serial', 'datetime'],  # solar_data keys
    right_on=['serial', 'time'],  # weather_data keys
    how='inner'  # Inner join to keep only matches
)

# Drop the duplicate 'time' column after merging
combined_data = combined_data.drop(columns=['time'])

combined_data.to_csv("data/PortugalPhotovoltaicDataset/combined_dataset.csv", index=False)

In [None]:
# UK: merge weather data with solar datasets 
solar_data = uk_power_networks_df
weather_data = uk_weather_df

weather_data.rename(columns={"serial":"name"}, inplace=True) # rename for consistency

# Ensure 'serial' columns have the same type
solar_data['name'] = solar_data['name'].astype(str)
weather_data['name'] = weather_data['name'].astype(str)


# Ensure the keys are formatted consistently
solar_data['datetime'] = pd.to_datetime(solar_data['datetime']).dt.tz_localize(None)  # Remove timezone info
weather_data['time'] = pd.to_datetime(weather_data['time'])  # Keep timezone if needed

# Merge on 'serial' and 'datetime'
combined_data = pd.merge(
    solar_data,
    weather_data,
    left_on=['name', 'datetime'],  # solar_data keys
    right_on=['name', 'time'],  # weather_data keys
    how='inner'  # Inner join to keep only matches
)

# Drop the duplicate 'time' column after merging
combined_data = combined_data.drop(columns=['time'])

combined_data.to_csv("data/UKPowerNetworks2013-2014/combined_dataset.csv", index=False)

In [None]:
combined_data

# Fetch from EU API

In [None]:
portugal_metadata

In [None]:
import requests

def fetch_pvgis_data(lat, lon, peakpower, loss, outputformat="json", startyear=None, endyear=None,  pvcalculation=0):
    """
    Fetch data from the PVGIS API using required and selected optional parameters.

    Parameters:
        lat (float): Latitude in decimal degrees.
        lon (float): Longitude in decimal degrees.
        peakpower (float): Nominal power of the PV system in kW.
        loss (float): Sum of system losses in percent.
        outputformat (str, optional): Output format, "csv" or "json". Defaults to "json".
        startyear (int, optional): First year of the output. Defaults to None.
        endyear (int, optional): Last year of the output. Defaults to None.
        pvcalculation (int, optional): 1 for PV production estimation, 0 for solar radiation only. Defaults to 0.

    Returns:
        dict or str: Response data in JSON format if "json" is selected, otherwise CSV text.
    """
    base_url = "https://re.jrc.ec.europa.eu/api/seriescalc"
    params = {
        "lat": lat,
        "lon": lon,
        "peakpower": peakpower,
        "loss": loss,
        "outputformat": outputformat,
        "startyear": startyear,
        "endyear": endyear,
        "pvcalculation": pvcalculation,
    }

    # Remove None values to keep the request clean
    params = {key: value for key, value in params.items() if value is not None}

    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise exception for HTTP errors
        if outputformat == "json":
            return response.json()
        else:
            return response.text
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

In [385]:
portugal_metadata

Unnamed: 0,PV Serial Number,Location,Latitude,Longitude,From date,To date,Installed Power (kWp),Connection Power (kWn)
0,84071567,Lisbon,38.728,-9.138,2019-01-01,2022-12-31,46.0,40.0
1,84071569,Lisbon,38.833,-9.191,2019-01-01,2022-12-31,16.32,15.0
2,84071568,Setubal,38.577,-8.872,2019-01-01,2022-12-31,23.52,20.0
3,84071570,Lisbon,38.725,-9.12,2019-01-01,2022-12-31,30.0,27.0
4,84071566,Faro,37.031,-7.893,2019-01-01,2022-12-31,7.0,6.6
5,62030198,Braga,41.493,-8.496,2019-01-01,2022-12-31,64.93,60.0
6,62032213,Lisbon,38.701,-9.236,2019-01-01,2022-12-31,22.54,20.0
7,73060645,Tavira,37.131,-7.645,2019-01-01,2022-12-31,46.0,40.0
8,73061935,Loule,37.131,-8.038,2019-01-01,2022-12-31,46.25,40.0


In [408]:
# Initialize an empty list to store data for all stations
all_data = []

# Loop through each solar plant in the metadata
for _, solar_plant in portugal_metadata.iterrows():
    # Fetch data for the current solar plant
    data = fetch_pvgis_data(
        lat=solar_plant["Latitude"],
        lon=solar_plant["Longitude"],
        peakpower=solar_plant["Installed Power (kWp)"],
        loss=10,  # Assume 10% loss
        outputformat="json",  # Ensure we get structured data
        startyear=2019,
        endyear=2022,
        pvcalculation=1
    )
    
    # Process the fetched data
    if data:
        # Extract hourly data
        hourly_data = pd.DataFrame(data.get("outputs", {}).get("hourly", []))
        if not hourly_data.empty:
            # Add metadata for the solar plant
            hourly_data["Serial"] = solar_plant["PV Serial Number"]
            hourly_data["Latitude"] = solar_plant["Latitude"]
            hourly_data["Longitude"] = solar_plant["Longitude"]
            hourly_data["Installed Power (kWp)"] = solar_plant["Installed Power (kWp)"]
            
            # Convert datetime format from "20190101:0010" to "YYYY-MM-DD HH:MM"
            hourly_data["time"] = pd.to_datetime(hourly_data["time"], format="%Y%m%d:%H%M")
            
            # Append to the list of all data
            all_data.append(hourly_data)
        else:
            print(f"No hourly data found for station {solar_plant['PV Serial Number']}")
    else:
        print(f"Failed to fetch data for station {solar_plant['PV Serial Number']}")

# Combine all station data into a single DataFrame
combined_data = pd.concat(all_data, ignore_index=True)

# Save to a single CSV file
combined_file_name = "data/PortugalPhotovoltaicDataset/combined_pvgis_data.csv"
combined_data.to_csv(combined_file_name, index=False)
print(f"Combined data saved to {combined_file_name}")

Combined data saved to data/PortugalPhotovoltaicDataset/combined_pvgis_data.csv


In [407]:
portugal_metadata

Unnamed: 0,PV Serial Number,Location,Latitude,Longitude,From date,To date,Installed Power (kWp),Connection Power (kWn)
0,84071567,Lisbon,38.728,-9.138,2019-01-01,2022-12-31,46.0,40.0
1,84071569,Lisbon,38.833,-9.191,2019-01-01,2022-12-31,16.32,15.0
2,84071568,Setubal,38.577,-8.872,2019-01-01,2022-12-31,23.52,20.0
3,84071570,Lisbon,38.725,-9.12,2019-01-01,2022-12-31,30.0,27.0
4,84071566,Faro,37.031,-7.893,2019-01-01,2022-12-31,7.0,6.6
5,62030198,Braga,41.493,-8.496,2019-01-01,2022-12-31,64.93,60.0
6,62032213,Lisbon,38.701,-9.236,2019-01-01,2022-12-31,22.54,20.0
7,73060645,Tavira,37.131,-7.645,2019-01-01,2022-12-31,46.0,40.0
8,73061935,Loule,37.131,-8.038,2019-01-01,2022-12-31,46.25,40.0


In [399]:
import pandas as pd

# Initialize an empty list to store data for all stations
uk_data = []

# Loop through each solar plant in the UK metadata
for _, solar_plant in uk_metadata.iterrows():
    # Fetch data for the current solar plant
    data = fetch_pvgis_data(
        lat=solar_plant["latitude"],
        lon=solar_plant["longitude"],
        peakpower=solar_plant["capacity(kwp)"],
        loss=10,  # Assume 10% loss
        outputformat="json",  # Ensure we get structured data
        startyear=int(solar_plant["start_date"][:4]),
        endyear=int(solar_plant["end_date"][:4]),
        pvcalculation=1
    )
    
    # Process the fetched data
    if data:
        # Extract hourly data
        hourly_data = pd.DataFrame(data.get("outputs", {}).get("hourly", []))
        if not hourly_data.empty:
            # Convert datetime format from "20190101:0010" to "YYYY-MM-DD HH:MM"
            hourly_data["time"] = pd.to_datetime(hourly_data["time"], format="%Y%m%d:%H%M")
            
            # Filter by the station's measurement period
            start_date = pd.to_datetime(solar_plant["start_date"])
            end_date = pd.to_datetime(solar_plant["end_date"])
            hourly_data = hourly_data[(hourly_data["time"] >= start_date) & (hourly_data["time"] <= end_date)]
            
            # Add metadata for the solar plant
            hourly_data["Name"] = solar_plant["name"]
            hourly_data["Latitude"] = solar_plant["latitude"]
            hourly_data["Longitude"] = solar_plant["longitude"]
            hourly_data["Capacity (kWp)"] = solar_plant["capacity(kwp)"]
            
            # Append to the list of all data
            uk_data.append(hourly_data)
        else:
            print(f"No hourly data found for station {solar_plant['name']}")
    else:
        print(f"Failed to fetch data for station {solar_plant['name']}")

# Combine all station data into a single DataFrame
combined_data = pd.concat(uk_data, ignore_index=True)

# Save to a single CSV file
combined_file_name = "data/UKPowerNetworks2013-2014/combined_pvgis_data.csv"
combined_data.to_csv(combined_file_name, index=False)
print(f"Combined data saved to {combined_file_name}")

Combined data saved to data/UKPowerNetworks2013-2014/combined_pvgis_data.csv


In [None]:
import pandas as pd
import plotly.express as px
import json

# # Load JSON data from a file
# with open("data/Timeseries SA3 46kWp 2019.json", "r") as file:  # Replace with your file path
#     data = json.load(file)

# Load hourly data into a DataFrame
hourly_data = pd.DataFrame(data["outputs"]["hourly"])

# Convert time strings to datetime
hourly_data["time"] = pd.to_datetime(hourly_data["time"], format="%Y%m%d:%H%M")

# # # Subset the data for January 2005 (optional, adjust for your needs)
# hourly_data = hourly_data[(hourly_data["time"] >= "2010-01-01") & (hourly_data["time"] <= "2019-01-10")]

# Create an interactive plot with Plotly
fig = px.line(
    hourly_data,
    x="time",
    y="P",
    title="Interactive Hourly PV Metrics for January 2005",
    labels={"time": "Time", "P": "Power (P) [kWh]"},
    template="plotly_dark"  # Optional: Dark theme
)

# Customize the plot
fig.update_traces(line=dict(width=2), marker=dict(size=4))
fig.update_layout(
    title_font=dict(size=20),
    xaxis=dict(title="Time", titlefont_size=16, tickfont_size=12),
    yaxis=dict(title="Power (P) [kWh]", titlefont_size=16, tickfont_size=12),
)

# Show the interactive plot
fig.show()

In [409]:
temp = pd.read_csv("data/PortugalPhotovoltaicDataset/combined_pvgis_data.csv")
temp

Unnamed: 0,time,P,G(i),H_sun,T2m,WS10m,Int,Serial,Latitude,Longitude,Installed Power (kWp)
0,2019-01-01 00:10:00,0.0,0.0,0.0,10.22,2.21,0.0,84071567,38.728,-9.138,46.00
1,2019-01-01 01:10:00,0.0,0.0,0.0,9.78,2.28,0.0,84071567,38.728,-9.138,46.00
2,2019-01-01 02:10:00,0.0,0.0,0.0,9.37,2.34,0.0,84071567,38.728,-9.138,46.00
3,2019-01-01 03:10:00,0.0,0.0,0.0,9.04,2.48,0.0,84071567,38.728,-9.138,46.00
4,2019-01-01 04:10:00,0.0,0.0,0.0,8.72,2.55,0.0,84071567,38.728,-9.138,46.00
...,...,...,...,...,...,...,...,...,...,...,...
315571,2022-12-31 19:09:00,0.0,0.0,0.0,16.07,3.72,0.0,73061935,37.131,-8.038,46.25
315572,2022-12-31 20:09:00,0.0,0.0,0.0,15.90,3.52,0.0,73061935,37.131,-8.038,46.25
315573,2022-12-31 21:09:00,0.0,0.0,0.0,15.78,3.45,0.0,73061935,37.131,-8.038,46.25
315574,2022-12-31 22:09:00,0.0,0.0,0.0,15.76,3.86,0.0,73061935,37.131,-8.038,46.25


In [411]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315576 entries, 0 to 315575
Data columns (total 11 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   time                   315576 non-null  object 
 1   P                      315576 non-null  float64
 2   G(i)                   315576 non-null  float64
 3   H_sun                  315576 non-null  float64
 4   T2m                    315576 non-null  float64
 5   WS10m                  315576 non-null  float64
 6   Int                    315576 non-null  float64
 7   Serial                 315576 non-null  int64  
 8   Latitude               315576 non-null  float64
 9   Longitude              315576 non-null  float64
 10  Installed Power (kWp)  315576 non-null  float64
dtypes: float64(9), int64(1), object(1)
memory usage: 26.5+ MB


In [412]:
portugal_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315567 entries, 0 to 315566
Data columns (total 6 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   serial                 315567 non-null  object        
 1   name                   315567 non-null  object        
 2   datetime               315567 non-null  datetime64[ns]
 3   produced energy        315567 non-null  float64       
 4   PV Serial Number       315567 non-null  object        
 5   Installed Power (kWp)  315567 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 14.4+ MB
