In [None]:
from datetime import datetime
DATA_FILE = f"news_data_{datetime.date(datetime.now())}.csv"


In [None]:
import requests
from bs4 import BeautifulSoup
import re
from tqdm import tqdm  # For progress bar

# Base URL
base_url = "http://www.cubadebate.cu/etiqueta/union-electrica-une/page/{}/"

# Range of pages to scrape
start_page = 1
end_page = 102

# List to store results
news_data = []

# Keywords to filter titles
keywords = ["afectación", "déficit"]

def extract_deficit(title):
    """
    Extracts numbers from the title. If there are multiple numbers, joins them into a single number.
    If there's only one number, returns it as an integer.
    """

    cleaned_title = re.sub(r'(\d)\s+(\d{3})', r'\1\2', title)
    # Find all numbers in the text
    numbers = re.findall(r'\b\d+\b', cleaned_title)
    if len(numbers) > 1:
        # Join all numbers into a single string
        if int(numbers[-1]) <= 31 and int("".join(numbers[:-1])) < 3000:
            return int("".join(numbers[:-1]))
        elif int("".join(numbers)) < 3000:
            return int("".join(numbers))
        return None
    elif numbers:
        # Return the single number as an integer
        return int(numbers[0])
    else:
        # No numbers found
        return None

def extract_availability(text):
    """
    Extracts the availability of energy in MW from the text, looking for the pattern "disponibilidad de <number> MW".
    """
    match = re.search(r'disponibilidad de (\d+)\s*MW', text, re.IGNORECASE)
    if match:
        return int(match.group(1))  # Return the extracted number as an integer
    return None

def extract_max_demand(text):
    """
    Extracts the maximum demand from the text, looking for the pattern "demanda máxima de <number> MW".
    Also supports the possibility of a space as a decimal separator.
    """
    match = re.search(r'demanda máxima de (\d+(?:[\s,]\d{3})*)\s*MW', text, re.IGNORECASE)
    if match:
        demand_str = match.group(1).replace(" ", "").replace(",", ".")  # Remove spaces, replace commas with dots
        try:
            return float(demand_str)
        except ValueError:
            return None
    return None


# Iterate through the pages with tqdm for progress tracking
for page in tqdm(range(start_page, end_page + 1), desc="Scraping pages"):
    url = base_url.format(page)
    
    # Make the HTTP request
    response = requests.get(url)
    if response.status_code != 200:
        continue  # Skip to the next page if there's an error
    
    # Parse the HTML
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find news articles
    for article in soup.select("div.noticias"):
        title_tag = article.select_one("div.title > a")
        date_tag = article.select_one("time[datetime]")
        
        if title_tag and date_tag:
            title = title_tag.text.strip()
            
            # Check for keywords in the title
            for keyword in keywords:
                if keyword in title:
                    raw_date = date_tag["datetime"].strip()
                    
                    # Convert the date to a datetime object
                    try:
                        date = datetime.fromisoformat(raw_date)
                    except ValueError:
                        date = None  # Handle invalid date formats
                    
                    # Extract the four-digit number from the title
                    deficit = extract_deficit(title)
                    availability = None
                    max_demand = None
                    
                    if deficit:
                        # Visit the article URL to get the body text
                        article_url = title_tag['href']
                        article_response = requests.get(article_url)
                        if article_response.status_code == 200:
                            article_soup = BeautifulSoup(article_response.text, 'html.parser')
                            article_body = article_soup.select_one('div.entry')
                            if article_body:
                                # Extract availability from the article body
                                availability = extract_availability(article_body.text)
                                
                                # Extract maximum demand from the article body
                                max_demand = extract_max_demand(article_body.text)
                        
                        # Append the data to the results list
                        news_data.append({
                            "date": date.strftime('%Y-%m-%d %H:%M:%S') if date else None,
                            "deficit_MW": deficit,
                            "type": keyword,
                            "availability_MW": availability,
                            "max_demand_MW": max_demand
                        })



In [None]:
import csv

with open(DATA_FILE, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["date", "deficit_MW", "type", "availability_MW", "max_demand_MW"])
    writer.writeheader()
    writer.writerows(news_data)

print(f"{len(news_data)} news items have been saved to 'news_data.csv'.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV
df = pd.read_csv(DATA_FILE)

# Convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter data where deficit > 100
filtered_df = df[df['deficit_MW'] > 100]

# Remove duplicates based on 'date'
filtered_df = filtered_df.drop_duplicates(subset='date')
filtered_df = filtered_df.sort_values('date')

# Calculate the average of the 'deficit_MW' column
average_deficit = filtered_df['deficit_MW'].mean()

# Extract the start and end dates for the range
start_date = filtered_df['date'].min().strftime('%b/%Y')  # Format: Month/Year
end_date = filtered_df['date'].max().strftime('%b/%Y')  # Format: Month/Year
date_range = f'From {start_date} to {end_date}'

# Plot the filtered data as a bar chart
plt.figure(figsize=(10, 6))
plt.bar(filtered_df['date'], filtered_df['deficit_MW'], color='red', label=f'Energy Deficit > 100 MW')

# Add a horizontal line for the average
plt.axhline(average_deficit, color='blue', linestyle='--', label=f'Average Energy Deficit: {average_deficit:.2f} MW')

# Title and labels
plt.title(f'Daily Energy Deficit Over Time (Above 100 MW) {date_range}', fontsize=14)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Deficit (MW)', fontsize=12)

# Rotate date labels for better readability
plt.xticks(rotation=45)

# Grid and layout
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()  # Adjust layout for better fit

# Show the plot with legend
plt.legend()
plt.show()


In [None]:
filtered_df['deficit_MW'].mean()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

termoelectricas = 2608 #MW

# Cargar el CSV
df = pd.read_csv(DATA_FILE)

# Convertir 'date' a datetime
df['date'] = pd.to_datetime(df['date'])

# Filtrar los datos donde el déficit es mayor a 100 MW
filtered_df = df[df['deficit_MW'] > 100].copy()  # Crear una copia para evitar el SettingWithCopyWarning

# Agrupar por mes y año, y calcular el promedio de MW por mes
filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')  # Agrupar por mes y año
monthly_avg = filtered_df.groupby('year_month')['deficit_MW'].mean().reset_index()



mean_availability = df['availability_MW'].mean()
mean_deficit = df['deficit_MW'].mean()

percentage_deficit = (mean_deficit / mean_availability) * 100

print(f"Promedio de disponibilidad (MW): {mean_availability}")
print(f"Promedio de déficit (MW): {mean_deficit}")
print(f"El porcentaje medio que representa el déficit de la disponibilidad es: {percentage_deficit:.2f}%")






# Graficar los promedios mensuales
plt.figure(figsize=(10, 6))
plt.bar(monthly_avg['year_month'].astype(str), monthly_avg['deficit_MW'], color='blue', label='Average Monthly Deficit')
plt.axhline(y=termoelectricas, color='red', linestyle='--', label=f'Thermal power generation ({termoelectricas} MW)')

# Título y etiquetas
plt.title(f'Average Monthly Energy Deficit (Above 100 MW) {date_range}', fontsize=14)
plt.xlabel('Month/Year', fontsize=12)
plt.ylabel('Average Deficit (MW)', fontsize=12)

# Rotar etiquetas de las fechas para mejor visibilidad
plt.xticks(rotation=45)

# Mostrar la cuadrícula y ajustar el layout
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Mostrar el gráfico
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Cargar el CSV
df = pd.read_csv(DATA_FILE)

# Convertir 'date' a datetime
df['date'] = pd.to_datetime(df['date'])

# Filtrar los datos donde el déficit es mayor a 100 MW
filtered_df = df[df['deficit_MW'] > 100].copy()

# Calcular el déficit como porcentaje del total de generación (6000 MW)
total_generation = 6499 #MW
termoelectricas = 2608 #MW

filtered_df['deficit_percentage'] = (filtered_df['deficit_MW'] / termoelectricas) * 100

# Agrupar por mes y año, y calcular el promedio del porcentaje de déficit por mes
filtered_df.loc[:, 'year_month'] = filtered_df['date'].dt.to_period('M')  # Agrupar por mes y año
monthly_avg = filtered_df.groupby('year_month')['deficit_percentage'].mean().reset_index()

# Calcular el porcentaje promedio general
average_deficit_percentage = monthly_avg['deficit_percentage'].mean()

# Graficar los promedios mensuales de déficit como porcentaje
plt.figure(figsize=(10, 6))
plt.bar(monthly_avg['year_month'].astype(str), monthly_avg['deficit_percentage'], color='blue', label='Average Monthly Deficit Percentage')

# Línea de referencia al porcentaje promedio
plt.axhline(y=average_deficit_percentage, color='red', linestyle='--', label=f'Average Deficit Percentage ({average_deficit_percentage:.2f}%)')

# Título y etiquetas
plt.title('Average Monthly Energy Deficit Percentage (Above 100 MW)', fontsize=14)
plt.xlabel('Month/Year', fontsize=12)
plt.ylabel('Average Deficit Percentage (%)', fontsize=12)

# Rotar etiquetas de las fechas para mejor visibilidad
plt.xticks(rotation=45)

# Mostrar la cuadrícula y ajustar el layout
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Mostrar el gráfico
plt.legend()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Cargar el CSV
df = pd.read_csv(DATA_FILE)

# Convertir 'date' a datetime
df['date'] = pd.to_datetime(df['date'])

# Filtrar los datos donde hay disponibilidad (no es NaN)
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Crear una copia para evitar el SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Asegurarnos de que el déficit también esté presente

# Convertir las fechas a formato año-mes
df['year_month'] = df['date'].dt.to_period('M')

# Agrupar por mes y calcular el valor promedio de la demanda máxima y el déficit
df_grouped_monthly = df.groupby('year_month').agg({'max_demand_MW': 'mean', 'deficit_MW': 'mean'}).reset_index()

# Calcular la disponibilidad como demanda máxima menos déficit
df_grouped_monthly['availability_MW'] = df_grouped_monthly['max_demand_MW'] - df_grouped_monthly['deficit_MW']

# Crear la variable x (meses) para las barras
x = np.arange(len(df_grouped_monthly))

# Graficar la barra de disponibilidad y déficit en la misma barra
plt.figure(figsize=(12, 6))

# Barras apiladas: Disponibilidad en verde y déficit en rojo
plt.bar(x, df_grouped_monthly['availability_MW'], label='Disponibilidad (MW)', color='green')
plt.bar(x, df_grouped_monthly['deficit_MW'], bottom=df_grouped_monthly['availability_MW'], label='Déficit (MW)', color='red')

# Graficar la demanda máxima como línea
plt.plot(x, df_grouped_monthly['max_demand_MW'], label='Demanda Máxima (MW)', color='blue', marker='o', linestyle='-', linewidth=2)

# Añadir la línea de la capacidad máxima de generación
plt.axhline(y=6499, color='purple', linestyle='--', label='Capacidad Máxima de Generación (6499 MW)')

# Rellenar el área entre la disponibilidad y la capacidad máxima de generación
plt.fill_between(x, df_grouped_monthly['availability_MW'], 6499, where=(df_grouped_monthly['availability_MW'] < 6499), 
                 interpolate=True, color='yellow', alpha=0.5, label='Capacidad instalada sin produccion de energia')

# Título y etiquetas
plt.title('Disponibilidad, Déficit, Demanda Máxima y Capacidad Máxima de Generación Promedio Mensual', fontsize=14)
plt.xlabel('Mes', fontsize=12)
plt.ylabel('MW', fontsize=12)

# Etiquetas de los meses
plt.xticks(x, df_grouped_monthly['year_month'].astype(str), rotation=45)

# Agregar leyenda
plt.legend()

# Mostrar el gráfico
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Cargar el CSV
df = pd.read_csv(DATA_FILE)

# Convertir 'date' a datetime
df['date'] = pd.to_datetime(df['date'])

# Filtrar los datos donde hay disponibilidad (no es NaN)
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Crear una copia para evitar el SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Asegurarnos de que el déficit también esté presente

# Convertir las fechas a formato año-mes
df['year_month'] = df['date'].dt.to_period('M')

# Agrupar por mes y calcular el valor promedio de la demanda máxima y el déficit
df_grouped_monthly = df.groupby('year_month').agg({'max_demand_MW': 'mean', 'deficit_MW': 'mean'}).reset_index()

# Calcular la disponibilidad como demanda máxima menos déficit
df_grouped_monthly['availability_MW'] = df_grouped_monthly['max_demand_MW'] - df_grouped_monthly['deficit_MW']

# Crear la variable x (meses) para las barras
x = np.arange(len(df_grouped_monthly))

# Graficar la área entre la disponibilidad y la capacidad máxima de generación (detrás de las barras)
plt.figure(figsize=(12, 6))

# Rellenar el área entre la disponibilidad y la capacidad máxima de generación (detrás de las barras)
plt.fill_between(x, df_grouped_monthly['availability_MW'], 6499, where=(df_grouped_monthly['availability_MW'] < 6499), 
                 interpolate=True, color='yellow', alpha=0.5, label='Área bajo la Capacidad Máxima')

# Barras apiladas: Disponibilidad en verde y déficit en rojo
plt.bar(x, df_grouped_monthly['availability_MW'], label='Disponibilidad (MW)', color='green')
plt.bar(x, df_grouped_monthly['deficit_MW'], bottom=df_grouped_monthly['availability_MW'], label='Déficit (MW)', color='red')

# Graficar la demanda máxima como línea
plt.plot(x, df_grouped_monthly['max_demand_MW'], label='Demanda Máxima (MW)', color='blue', marker='o', linestyle='-', linewidth=2)

# Añadir la línea de la capacidad máxima de generación
plt.axhline(y=6499, color='purple', linestyle='--', label='Capacidad Máxima de Generación (6499 MW)')

# Título y etiquetas
plt.title('Disponibilidad, Déficit, Demanda Máxima y Capacidad Máxima de Generación Promedio Mensual', fontsize=14)
plt.xlabel('Mes', fontsize=12)
plt.ylabel('MW', fontsize=12)

# Etiquetas de los meses
plt.xticks(x, df_grouped_monthly['year_month'].astype(str), rotation=45)

# Agregar leyenda
plt.legend()

# Mostrar el gráfico
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the CSV data
df = pd.read_csv(DATA_FILE)

# Convert 'date' to datetime format
df['date'] = pd.to_datetime(df['date'])

# Define the date range for the data
start_date = df['date'].min().strftime('%b/%Y')  # Format: Month/Year
end_date = df['date'].max().strftime('%b/%Y')  # Format: Month/Year
date_range = f'From {start_date} to {end_date}'

# Filter the dataset to include only records with available data
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Create a copy to avoid SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Ensure deficit data is also present

# Convert 'date' to Year-Month format
df['year_month'] = df['date'].dt.to_period('M')

# Group the data by month and calculate the average values of maximum demand and deficit
df_grouped_monthly = df.groupby('year_month').agg({'max_demand_MW': 'mean', 'deficit_MW': 'mean'}).reset_index()

# Calculate availability as the difference between maximum demand and deficit
df_grouped_monthly['availability_MW'] = df_grouped_monthly['max_demand_MW'] - df_grouped_monthly['deficit_MW']


# Create an x variable for the months
x = np.arange(len(df_grouped_monthly))

# Plot the areas for availability, deficit, and unused installed capacity
plt.figure(figsize=(12, 6))

# Availability area (positive capacity)
plt.fill_between(x, 0, df_grouped_monthly['availability_MW'], color='green', alpha=0.5, label='Availability (MW)')

# Deficit area (capacity shortage)
plt.fill_between(x, df_grouped_monthly['availability_MW'], df_grouped_monthly['availability_MW'] + df_grouped_monthly['deficit_MW'], 
                 color='red', alpha=0.5, label='Deficit (MW)')

# Fill area up to the maximum installed capacity (unused capacity)
plt.fill_between(x, df_grouped_monthly['availability_MW'] + df_grouped_monthly['deficit_MW'], 6499, 
                 color='yellow', alpha=0.5, label='Unused Installed Capacity')

# Plot the Maximum Demand as a line
plt.plot(x, df_grouped_monthly['max_demand_MW'], label='Maximum Demand (MW)', color='blue', marker='o', linestyle='-', linewidth=2)


# Add a horizontal line for the Maximum Generation Capacity
plt.axhline(y=6499, color='purple', linestyle='--', label='Maximum Generation Capacity (6499 MW)')

# Title and axis labels
# plt.title(f'Availability, Deficit, and Unused Installed Capacity (Energy Production) {date_range}', fontsize=14)
plt.xlabel(f'Monthly average. {date_range}', fontsize=12)  # Added indication for the monthly average
plt.ylabel('Power (MW)', fontsize=12)  # Changed to "Power" for clarity

# Display month labels on the x-axis
plt.xticks(x, df_grouped_monthly['year_month'].astype(str), rotation=45)

# Add legend to the plot
plt.legend()

# Display the grid and format the layout
plt.grid(True)
plt.tight_layout()

# Save the plot as a PNG image
plt.savefig('energy_performance_chart_monthly.png', dpi=300)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the CSV
df = pd.read_csv(DATA_FILE)

# Convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter the data where availability is not NaN
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Create a copy to avoid SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Ensure deficit is also present

# Calculate availability as max demand minus deficit
df['availability_MW'] = df['max_demand_MW'] - df['deficit_MW']

# Sort the data by date to ensure correct visualization
df = df.sort_values(by='date')

# Create the x variable (days)
x = np.arange(len(df))

# Plot the area for availability, deficit, and fill up to the maximum capacity
plt.figure(figsize=(15, 7))

# Available Energy area
plt.fill_between(x, 0, df['availability_MW'], color='green', alpha=0.5, label='Available Energy (MW)')

# Energy Deficit area
plt.fill_between(x, df['availability_MW'], df['availability_MW'] + df['deficit_MW'], 
                 color='red', alpha=0.5, label='Energy Deficit (MW)')

# Fill up to the maximum generation capacity
plt.fill_between(x, df['availability_MW'] + df['deficit_MW'], 6499, 
                 color='yellow', alpha=0.5, label='Installed Capacity without Production')

# Plot the Maximum Demand as a line
plt.plot(x, df['max_demand_MW'], label='Maximum Demand (MW)', color='blue', linestyle='-', linewidth=1)

# Add the line for the Maximum Generation Capacity
plt.axhline(y=6499, color='purple', linestyle='--', label='Maximum Generation Capacity (6499 MW)')

# Title and labels
date_range = f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}"
plt.title(f'Available Energy, Energy Deficit, and Installed Capacity without Production ({date_range})', fontsize=14)
plt.xlabel('Days', fontsize=12)
plt.ylabel('Electrical Energy (MW)', fontsize=12)

# Day labels (show only a few to avoid saturation)
tick_positions = np.linspace(0, len(x) - 1, num=10, dtype=int)
plt.xticks(tick_positions, df['date'].iloc[tick_positions].dt.strftime('%Y-%m-%d'), rotation=45)

# Add legend
plt.legend()

# Display the plot
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the CSV
df = pd.read_csv(DATA_FILE)

# Convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter the data for the year 2024
df = df[(df['date'].dt.year == 2024)]

# Filter the data where availability is not NaN
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Create a copy to avoid SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Ensure deficit is also present

# Calculate availability as maximum demand minus deficit
df['availability_MW'] = df['max_demand_MW'] - df['deficit_MW']

# Sort the data by date to ensure correct visualization
df = df.sort_values(by='date')

# Create the x variable (days)
x = np.arange(len(df))

# Plot the area for availability, deficit, and fill up to the maximum capacity
plt.figure(figsize=(15, 7))

# Available Energy area
plt.fill_between(x, 0, df['availability_MW'], color='green', alpha=0.5, label='Available Energy (MW)')

# Energy Deficit area
plt.fill_between(x, df['availability_MW'], df['availability_MW'] + df['deficit_MW'], 
                 color='red', alpha=0.5, label='Energy Deficit (MW)')

# Fill up to the maximum generation capacity
plt.fill_between(x, df['availability_MW'] + df['deficit_MW'], 6499, 
                 color='yellow', alpha=0.5, label='Installed Capacity without Production')

# Plot the Maximum Demand as a line
plt.plot(x, df['max_demand_MW'], label='Maximum Demand (MW)', color='blue', linestyle='-', linewidth=2)

# Add the line for the Maximum Generation Capacity
plt.axhline(y=6499, color='purple', linestyle='--', label='Maximum Generation Capacity (6499 MW)', linewidth=2)

# Title and labels
plt.title(f'Available Energy, Energy Deficit, and Installed Capacity without Production (2024)', fontsize=14)
plt.xlabel('Days', fontsize=12)
plt.ylabel('Electrical Energy (MW)', fontsize=12)

# Day labels (show only a few to avoid saturation)
tick_positions = np.linspace(0, len(x) - 1, num=10, dtype=int)
plt.xticks(tick_positions, df['date'].iloc[tick_positions].dt.strftime('%Y-%m-%d'), rotation=45)

# Add legend
plt.legend()

# Display the plot
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np

# Cargar el CSV
df = pd.read_csv(DATA_FILE)

# Convertir 'date' a datetime
df['date'] = pd.to_datetime(df['date'])

# Filtrar los datos para el año 2024
df = df[(df['date'].dt.year == 2024)]

# Filtrar los datos donde hay disponibilidad (no es NaN)
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()  # Crear una copia para evitar el SettingWithCopyWarning
df = df[df['deficit_MW'].notna()]  # Asegurarnos de que el déficit también esté presente

# Calcular la disponibilidad como demanda máxima menos déficit
df['availability_MW'] = df['max_demand_MW'] - df['deficit_MW']

# Ordenar los datos por fecha para garantizar la correcta visualización
df = df.sort_values(by='date')

# Fechas de apagones
blackouts = ['2024-10-18', '2024-11-17', '2024-12-04']
blackout_dates = pd.to_datetime(blackouts)

# Crear un DataFrame para las fechas de apagón con NaN para los valores de las columnas
blackout_df = pd.DataFrame({
    'date': blackout_dates,
    'max_demand_MW': np.nan,
    'deficit_MW': np.nan,
    'availability_MW': np.nan
})

# Concatenar el DataFrame de apagones con el DataFrame original
df = pd.concat([df, blackout_df], ignore_index=True)

# Volver a ordenar el DataFrame después de agregar las fechas de apagón
df = df.sort_values(by='date')

# Interpolación de los datos entre las fechas más cercanas
df['max_demand_MW'] = df['max_demand_MW'].interpolate(method='linear')
df['deficit_MW'] = df['deficit_MW'].interpolate(method='linear')
df['availability_MW'] = df['availability_MW'].interpolate(method='linear')

# Mostrar el DataFrame resultante
print(df)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv(DATA_FILE)
df = df.drop(columns=['type'], errors='ignore')

# Convert 'date' to datetime
df['date'] = pd.to_datetime(df['date'])
df.set_index('date')

# Filter data for the year 2024
df = df[df['date'].dt.year == 2024]

# Filter data where availability is not NaN
df = df[df['max_demand_MW'].notna()]
df = df[df['max_demand_MW'] > 1000].copy()
df = df[df['deficit_MW'].notna()]

# Calculate availability as max demand minus deficit
df['availability_MW'] = df['max_demand_MW'] - df['deficit_MW']

# Create list of blackout dates
blackouts = ['2024-10-18', '2024-11-17', '2024-12-04']
blackout_dates = pd.to_datetime(blackouts)

for i, d in enumerate(blackout_dates):
    df.loc[len(df) + i] = [d, pd.NaT, pd.NaT, pd.NaT]

# Ensure dates are unique
df = df.sort_values(by='date').drop_duplicates(subset=['date']).reset_index(drop=True)

# Interpolate NaN values (only numeric columns)
df['max_demand_MW'] = df['max_demand_MW'].interpolate(method='linear')
df['deficit_MW'] = df['deficit_MW'].interpolate(method='linear')
df['availability_MW'] = df['availability_MW'].interpolate(method='linear')

# Ensure the columns are numeric
df.set_index('date', inplace=True)
df_clean = df.dropna(subset=['max_demand_MW', 'deficit_MW', 'availability_MW'])

df_clean['max_demand_MW'] = pd.to_numeric(df_clean['max_demand_MW'], errors='coerce')
df_clean['deficit_MW'] = pd.to_numeric(df_clean['deficit_MW'], errors='coerce')
df_clean['availability_MW'] = pd.to_numeric(df_clean['availability_MW'], errors='coerce')

# Create x variable (days)
x = np.arange(len(df_clean))

# Plot availability, deficit, and remaining capacity up to maximum installed capacity
plt.figure(figsize=(15, 7))

# Availability area
plt.fill_between(x, 0, df_clean['availability_MW'], color='green', alpha=0.5, label='Availability (MW)')

# Deficit area
plt.fill_between(x, df_clean['availability_MW'], df_clean['availability_MW'] + df_clean['deficit_MW'], 
                 color='red', alpha=0.5, label='Deficit (MW)')

# Fill up to maximum generation capacity
plt.fill_between(x, df_clean['availability_MW'] + df_clean['deficit_MW'], 6499, 
                 color='yellow', alpha=0.5, label='Installed Capacity without Production')

# Plot the maximum demand as a line
plt.plot(x, df_clean['max_demand_MW'], label='Maximum Demand (MW)', color='blue', linestyle='-', linewidth=2)

# Add maximum generation capacity line
plt.axhline(y=6499, color='purple', linestyle='--', label='Maximum Generation Capacity (6499 MW)')

# Axis labels
plt.xlabel('Daily (2024)', fontsize=12)
plt.ylabel('Power (MW)', fontsize=12)

# Day labels (show only a few to avoid saturation)
tick_positions = np.linspace(0, len(x) - 1, num=10, dtype=int)
plt.xticks(tick_positions, df.index[tick_positions].strftime('%Y-%m-%d'), rotation=45)

# Add vertical lines for blackouts
for i, blackout in enumerate(blackout_dates):
    blackout_index = df.index.get_loc(blackout)  # Get the index of the blackout date
    if i == 0:  # Only show the first blackout in the legend
        plt.axvline(x=blackout_index, color='black', linestyle='--', linewidth=2, label='General Blackout')
    else:
        plt.axvline(x=blackout_index, color='black', linestyle='--', linewidth=2)
    plt.text(blackout_index - 2, 6500 * 0.8, blackout.strftime('%Y-%m-%d'), rotation=90, ha='center', va='bottom', color='black')

# Add legend
plt.legend()

# Display the plot
plt.grid(True)
plt.tight_layout()
plt.savefig('energy_performance_chart_2024.png', dpi=300)
plt.show()

mean_availability = df['availability_MW'].mean()
mean_deficit = df['deficit_MW'].mean()

percentage_deficit = (mean_deficit / mean_availability) * 100

print(f"Promedio de disponibilidad (MW): {mean_availability}")
print(f"Promedio de déficit (MW): {mean_deficit}")
print(f"El porcentaje medio que representa el déficit de la disponibilidad es: {percentage_deficit:.2f}%")




| Tipos de Energía             | Matriz energética actual | Matriz Energética futura para el 2030 |
|------------------------------|--------------------------|---------------------------------------|
| Crudo                        | 45 %                     | 32 %                                  |
| Fuel Térmicas                | 15,1 %                   | 5 %                                   |
| Fuel Motores                 | 18 %                     | 9 %                                   |
| Gas Acompañante              | 14,1 %                   | 8 %                                   |
| Diésel                       | 3,3 %                    | 1 %                                   |
| Biomasa                      | 3,7 %                    | 14 %                                  |
| Hidráulica                   | 0,5 %                    | 1 %                                   |
| Eólica                        | 0,1 %                    | 6 %                                   |
| Solar                        | 0,2 %                    | 3 %                                   |
| Otros combustibles Fósiles   | -                        | 21 %                                  |

fuente: http://scielo.sld.cu/scielo.php?pid=S2223-48612023000100012&script=sci_arttext

In [None]:
import matplotlib.pyplot as plt

# Datos
tipos_energia = [
    'Crudo', 'Fuel Térmicas', 'Fuel Motores', 'Gas Acompañante', 'Diésel',
    'Biomasa', 'Hidráulica', 'Eólica', 'Solar', 'Otros combustibles Fósiles'
]
matriz_actual = [45, 15.1, 18, 14.1, 3.3, 3.7, 0.5, 0.1, 0.2, 0]
matriz_futura = [32, 5, 9, 8, 1, 14, 1, 6, 3, 21]

# Crear los gráficos de pastel
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 7))

# Gráfico de pastel para la matriz energética actual
wedges1, texts1 = ax1.pie(matriz_actual, labels=tipos_energia, startangle=90, colors=plt.cm.Paired.colors, wedgeprops={'edgecolor': 'black'})

# Gráfico de pastel para la matriz energética futura (2030)
wedges2, texts2 = ax2.pie(matriz_futura, labels=tipos_energia, startangle=90, colors=plt.cm.Paired.colors, wedgeprops={'edgecolor': 'black'})

# Añadir flechas de conexión y leyenda
ax1.legend(wedges1, [f'{label} - {pct}%' for label, pct in zip(tipos_energia, matriz_actual)], title="Matriz Energética Actual", loc="center left", bbox_to_anchor=(1, 0.5), fontsize=10, markerscale=1, handlelength=2)

ax2.legend(wedges2, [f'{label} - {pct}%' for label, pct in zip(tipos_energia, matriz_futura)], title="Matriz Energética Futura (2030)", loc="center left", bbox_to_anchor=(1, 0.5), fontsize=10, markerscale=1, handlelength=2)

# Títulos
ax1.set_title('Matriz Energética Actual')
ax2.set_title('Matriz Energética Futura (2030)')

# Mostrar los gráficos
plt.tight_layout()
plt.show()


In [None]:
import requests
import numpy as np

# Diccionario con las URLs de los archivos
data_wind_speed = {
    '10m': "http://www.insmet.cu/west/datos/ae10m_3x1.txt",
    '30m': "http://www.insmet.cu/west/datos/ae30m_3x1.txt",
    '50m': "http://www.insmet.cu/west/datos/ae50m_3x1.txt",
    '100m': "http://www.insmet.cu/west/datos/ae100m_3x1.txt"
}

# Función para calcular el potencial eólico en W/m²
def calculate_wind_potential(wind_speed):
    rho = 1.225  # Densidad del aire en kg/m³
    return 0.5 * rho * (wind_speed ** 3)

# Función para descargar y obtener el valor máximo de cada archivo
def get_max_wind_speed(url):
    response = requests.get(url)
    if response.status_code == 200:
        # Inicializar una lista para almacenar los valores convertidos a float
        data = []
        for value in response.text.split():
            try:
                # Intentar convertir el valor a float
                data.append(float(value))
            except ValueError:
                # Si ocurre un error de conversión, imprimir un mensaje y continuar
                print(f"Advertencia: No se pudo convertir '{value}' a float.")
        
        # Retornar el máximo, si la lista no está vacía
        if data:
            return np.max(data)
        else:
            return None
    else:
        print(f"Error al descargar el archivo: {url}")
        return None

# Iterar sobre el diccionario y obtener el máximo para cada serie
max_wind_speeds = {}
wind_potentials = {}  # Diccionario para almacenar el potencial eólico

for key, url in data_wind_speed.items():
    max_speed = get_max_wind_speed(url)
    if max_speed is not None:
        max_wind_speeds[key] = max_speed
        # Calcular el potencial eólico
        wind_potentials[key] = calculate_wind_potential(max_speed)

# Imprimir los resultados
for key, max_speed in max_wind_speeds.items():
    print(f"Máxima velocidad de viento a {key}: {max_speed} m/s")


In [None]:
import requests
import numpy as np

# Diccionario con las URLs de los archivos
data_wind_speed = {
    '10m': "http://www.insmet.cu/west/datos/ap10m_3x1.txt",
    '30m': "http://www.insmet.cu/west/datos/ap30m_3x1.txt",
    '50m': "http://www.insmet.cu/west/datos/ap50m_3x1.txt",
    '100m': "http://www.insmet.cu/west/datos/ap100m_3x1.txt"
}

# Función para descargar y obtener el valor máximo de cada archivo
def get_max_wind_speed(url):
    response = requests.get(url)
    if response.status_code == 200:
        # Inicializar una lista para almacenar los valores convertidos a float
        data = []
        for value in response.text.split():
            try:
                # Intentar convertir el valor a float
                data.append(float(value))
            except ValueError:
                # Si ocurre un error de conversión, imprimir un mensaje y continuar
                # print(f"Advertencia: No se pudo convertir '{value}' a float. {url}")
                ...
        # Retornar el máximo, si la lista no está vacía
        if data:
            return np.max(data)
        else:
            return None
    else:
        print(f"Error al descargar el archivo: {url}")
        return None

# Iterar sobre el diccionario y obtener el máximo para cada serie
max_wind_speeds = {}
for key, url in data_wind_speed.items():
    max_wind_speeds[key] = get_max_wind_speed(url)

# Imprimir los resultados
for key, max_speed in max_wind_speeds.items():
    print(f"Máxima Potencial eolico {key}: {max_speed} W/m²")


In [None]:
from gearbox.transmission.gears import Transmission, Lubricant, Material, Gear, Tool
from gearbox.standards.iso import Pitting as isoPitting
from gearbox.standards.iso import Bending as isoBending
from gearbox.standards.agma import Pitting as agmaPitting
from gearbox.standards.agma import Bending as agmaBending
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

rotor = pd.read_csv("torque_rotor",
    delim_whitespace=True,
    header=None,
    names=[
        "Time",
        "total_x",
        "total_y",
        "total_z",
        "pressure_x",
        "pressure_y",
        "pressure_z",
        "viscous_x",
        "viscous_y",
        "viscous_z",
    ],
)
# Define angular speed and number of blades
omega = 0.7916813478  # rad/s
n_blades = 3

# Calculate rotation period
f = omega / (2 * np.pi)  # frequency in Hz
T = 1 / f  # period in seconds
T_blade = T / n_blades  # time per blade


rotor = rotor[rotor["Time"] > T]
# Extraer columnas necesarias
time_step = rotor["Time"]
power = np.abs(rotor["total_x"] * omega) / 1e3


# https://onlinelibrary.wiley.com/doi/full/10.1002/we.1884
# Parámetros de los engranajes
module = 4.5  # Módulo (m)
helix_angle = 0.0  # Ángulo de hélice (beta), para engranajes rectos
pressure_angle = 20.0  # Ángulo de presión (alpha)

# Definir el lubricante (típico para engranajes industriales)
lubricant = Lubricant(
    name='Kiruna',
    v40=160  # Viscosidad a 40°C (cP)
)

# Definir el material del engranaje (AISI 2010 es un material común en turbinas eólicas)
material = Material(
    name='AISI 2010',
    classification='NV(nitrocar)',
    sh_limit=1500.,  # Límite de escofina (MPa)
    sf_limit=460.,   # Límite de fluencia (MPa)
    e=206000.,       # Módulo de elasticidad (MPa)
    poisson=0.3,     # Coeficiente de Poisson
    density=7.83e-6, # Densidad (kg/m^3)
    brinell=286.6667  # Dureza Brinell
)

# Definir el perfil de dientes del engranaje
tool = Tool(
    ha_p=1,
    hf_p=1.25,
    rho_fp=0.38,
    x=0,
    rho_ao=0,
    delta_ao=0,
    nc=10.
)

# Definir el engranaje pinion (piñón) con parámetros típicos
pinion = Gear(
    profile=tool,
    material=material,
    z=17.,  # Número de dientes
    beta=helix_angle,  # Ángulo de hélice
    alpha=pressure_angle,  # Ángulo de presión
    m=module,  # Módulo
    x=0.8,  # Relación de modificación del perfil
    b=491,  # Ancho del engranaje (mm)
    bs=491,  # Ancho del engranaje (mm)
    sr=0.0,  # Relación de contacto
    rz=3.67,  # Relación de radio
    precision_grade=6.0,  # Grado de precisión
    shaft_diameter=35.0,  # Diámetro del eje (mm)
    schema=3.0,  # Tipo de engranaje
    l=60.0,  # Longitud total del engranaje (mm)
    s=15.0,  # Resistencia de material (MPa)
    backlash=0.017,  # Juego entre los dientes (mm)
    gear_crown=1,  # Tipo de corona de engranaje
    helix_modification=1,  # Modificación de la hélice
    favorable_contact=True,  # Contacto favorable
    gear_condition=1  # Condición del engranaje
)

# Definir el engranaje gear (engranaje grande) con parámetros típicos
gear = Gear(
    profile=tool,
    material=material,
    z=56.,  # Número de dientes
    m=module,  # Módulo
    beta=helix_angle,  # Ángulo de hélice
    alpha=pressure_angle,  # Ángulo de presión
    x=-0.501,  # Relación de modificación del perfil
    b=491,  # Ancho del engranaje (mm)
    bs=491,  # Ancho del engranaje (mm)
    sr=0.0,  # Relación de contacto
    rz=3.67,  # Relación de radio
    precision_grade=6.0,  # Grado de precisión
    shaft_diameter=50.0,  # Diámetro del eje (mm)
    schema=3.0,  # Tipo de engranaje
    l=60.0,  # Longitud total del engranaje (mm)
    s=35.0,  # Resistencia de material (MPa)
    backlash=-0.017,  # Juego entre los dientes (mm)
    gear_crown=1,  # Tipo de corona de engranaje
    helix_modification=1,  # Modificación de la hélice
    favorable_contact=True,  # Contacto favorable
    gear_condition=1  # Condición del engranaje
)

pair = [pinion, gear]
data = []
for i,p in enumerate(power):
    transmission = Transmission(
        gears=pair,
        lubricant=lubricant,
        rpm_in=7.56,  # Velocidad de entrada típica para turbina eólica (RPM)
        p=p,  # Potencia de entrada en W (5 MW)
        l=863,  # Distancia entre los centros de los engranajes (mm)
        gear_box_type=2,  # Tipo de caja de engranajes
        ka=1.3,  # Factor de carga
        sh_min=1,  # Factor de seguridad mínimo
        sf_min=1  # Factor de seguridad mínimo en flexión
    )
    t = time_step.iloc[i]
    data.append((t,p,agmaPitting(transmission=transmission).calculate()["sigmaH"],agmaBending(transmission=transmission).calculate()["sigmaftwo"]))

data = np.array(data)
# Separar las columnas en variables individuales
_time = data[:, 0]  # Columna 0: Torque
torque = data[:, 1]  # Columna 0: Torque
contact_stress = data[:, 2]  # Columna 1: Tensiones de contacto
mending_stress = data[:, 3]  # Columna 1: Tensiones de contacto

print(contact_stress.max()-contact_stress.min())

# Crear el gráfico
plt.figure(figsize=(8, 6))
# plt.plot(_time, contact_stress,  linestyle='-',  label='Tensiones de Contacto')
plt.plot(_time, mending_stress,  linestyle='-',  label='Tensiones de flexion')
# plt.plot(_time, torque,  linestyle='-',  label='Tensiones de flexion')

# Configurar título y etiquetas
plt.title("Relación entre Torque y Tensiones de Contacto", fontsize=14)
plt.xlabel("Torque (Nm)", fontsize=12)
plt.ylabel("Tensión de Contacto (MPa)", fontsize=12)

# Agregar grid y leyenda
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(fontsize=12)

# Mostrar el gráfico
plt.tight_layout()
plt.show()

# Definir la transmisión con los parámetros para turbina eólica de 5 MW
# pair = [pinion, gear]
# transmission = Transmission(
#     gears=pair,
#     lubricant=lubricant,
#     rpm_in=7.56,  # Velocidad de entrada típica para turbina eólica (RPM)
#     p=5000.0,  # Potencia de entrada en W (5 MW)
#     l=863,  # Distancia entre los centros de los engranajes (mm)
#     gear_box_type=2,  # Tipo de caja de engranajes
#     ka=1.3,  # Factor de carga
#     sh_min=1,  # Factor de seguridad mínimo
#     sf_min=1  # Factor de seguridad mínimo en flexión
# )


# # Calcular y mostrar los resultados de Pitting y Bending según AGMA
# print('========================================')
# print('AGMA Pitting')
# print(agmaPitting(transmission=transmission).calculate())  # Cálculo de Pitting con AGMA
# print('========================================')

# print('========================================')
# print('AGMA Bending')
# print(agmaBending(transmission=transmission).calculate())  # Cálculo de Bending con AGMA
# print('========================================')

# Descomentar este bloque si deseas calcular Pitting y Bending con ISO
# print('========================================')
# print('ISO Pitting')
# print(isoPitting(transmission=transmission).calculate())  # Cálculo de Pitting con ISO
# print('========================================')

# print('========================================')
# print('ISO Bending')
# print(isoBending(transmission=transmission).calculate())  # Cálculo de Bending con ISO
# print('========================================')


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Combine the two datasets
data = np.array([
    [2023, 15975],
    [2022, 14707],
    [2021, 12833],
    [2020, 12085],
    [2019, 11853],
    [2018, 11269],
    [2017, 10086],
    [2016, 10021],
    [2015, 8997],
    [2014, 9489],
    [2013, 8496],
    [2012, 7822],
    [2011, 6399],
    [2010, 5521],
    [2009, 4551],
    [2008, 3194],
    [2007, 2582],
    [2006, 1964],
    [2005, 1487],
    [2004, 1196],
    [2003, 981],
    [2002, 718],
    [2001, 484],
    [2000, 416],
])

wind_power = np.array([
    [2023, 2304.44],
    [2022, 2098.52],
    [2021, 1849.47],
    [2020, 1590.68],
    [2019, 1419.8],
    [2018, 1267.89],
    [2017, 1138.96],
    [2016, 960.0],
    [2015, 829.57],
    [2014, 706.01],
    [2013, 634.05],
    [2012, 529.18],
    [2011, 439.88],
    [2010, 345.92],
    [2009, 276.21],
    [2008, 220.8],
    [2007, 171.11],
    [2006, 133.16],
    [2005, 104.37],
    [2004, 85.45],
    [2003, 63.18],
    [2002, 52.21],
    [2001, 38.17],
    [2000, 31.14],
])

# Merge arrays based on the year column
combined_data = np.hstack((data, wind_power[:, 1].reshape(-1, 1)))

# Plot the data
fig, ax1 = plt.subplots(figsize=(10, 6))

# Primary Y-axis: Wind Power Generation
ax1.set_xlabel("Year")
ax1.set_ylabel("Wind Power Generation (TWh)", color="tab:blue")
ax1.plot(combined_data[:, 0], combined_data[:, 2], color="tab:blue", label="Wind Power Generation (TWh)")
ax1.tick_params(axis="y", labelcolor="tab:blue")
ax1.legend(loc="upper left")

# Secondary Y-axis: Publications
ax2 = ax1.twinx()
ax2.set_ylabel("Publications", color="tab:orange")
ax2.plot(combined_data[:, 0], combined_data[:, 1], color="tab:orange", label="Publications")
ax2.tick_params(axis="y", labelcolor="tab:orange")
ax2.legend(loc="upper right")

# Title and layout adjustments
plt.title("Wind Power Generation and Related Publications Over Time")
fig.tight_layout()
plt.grid(axis="x", linestyle="--", alpha=0.7)
plt.show()