In [2]:
import pandas as pd
import os
from datetime import datetime, timedelta
from jinja2 import Environment, FileSystemLoader
import pdfkit
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt
import base64

In [3]:
def generate_faulty_panel_summary(crawl_data_folder, start_date, end_date, threshold):
    faulty_panels = {}
    current_date = datetime.now().date()

    # 从2024-08-26开始
    analysis_start_date = pd.to_datetime('2024-08-26')

    # 遍历每个周
    for week in pd.date_range(start=analysis_start_date, end=current_date, freq='W-Mon'):
        week_folder = os.path.join(crawl_data_folder, week.strftime('%Y-%m-%d'))

        if os.path.exists(week_folder):
            # 遍历该周的所有文件
            for file_name in os.listdir(week_folder):
                if file_name.endswith('.csv'):
                    file_path = os.path.join(week_folder, file_name)
                    df = pd.read_csv(file_path, parse_dates=[0], index_col=[0])

                    # 计算每日发电量
                    daily_energy = df.sum() / 4 / 1000  # 假设每个模块的发电量以kWh表示

                    # 计算平均值和标准差
                    avg_energy = daily_energy.mean()
                    std_dev = daily_energy.std()

                    # 遍历每个面板
                    for module in df.columns:
                        panel_daily_energy = daily_energy[module]
                        if panel_daily_energy < avg_energy - threshold * std_dev:
                            if module not in faulty_panels:
                                faulty_panels[module] = {
                                    'first_detected_date': week,
                                    'resolved_status': 'No',
                                    'kWh_loss': 0
                                }
                            # 计算kWh损失
                            faulty_panels[module]['kWh_loss'] += (avg_energy - panel_daily_energy)

    # 更新Resolved Status
    for module in faulty_panels.keys():
        last_week_folder = os.path.join(crawl_data_folder, current_date.strftime('%Y-%m-%d'))
        if os.path.exists(last_week_folder):
            last_week_file_path = os.path.join(last_week_folder, f'{module}.csv')
            if os.path.exists(last_week_file_path):
                df = pd.read_csv(last_week_file_path, parse_dates=[0], index_col=[0])
                last_week_daily_energy = df.sum() / 4 / 1000
                last_week_avg_energy = last_week_daily_energy.mean()
                last_week_std_dev = last_week_daily_energy.std()

                if last_week_daily_energy[module] >= last_week_avg_energy - threshold * last_week_std_dev:
                    faulty_panels[module]['resolved_status'] = 'Yes'

    return faulty_panels

In [4]:
def generate_status_table(low_energy_modules, start_date, end_date):
    date_range = pd.date_range(start='2024-08-26', end=end_date, freq='W-Mon')
    status_table = {'weeks': []}

    # 添加周的名称
    for week in date_range:
        status_table['weeks'].append(f"Week {len(status_table['weeks']) + 1} ({week.strftime('%Y-%m-%d')} to {(week + pd.DateOffset(days=6)).strftime('%Y-%m-%d')})")

    all_panels = set()

    for module in low_energy_modules:
        statuses = []
        fault_count = 0  # 统计故障次数

        for week in date_range:
            week_folder = os.path.join(crawl_data_folder, week.strftime('%Y-%m-%d'))
            file_path = os.path.join(week_folder, f'{module}.csv')

            if os.path.exists(file_path):
                df = pd.read_csv(file_path, parse_dates=[0], index_col=[0])
                daily_energy = df[module].sum() / 4 / 1000
                if daily_energy < 1:
                    statuses.append('F')
                    fault_count += 1  # 增加故障计数
                    all_panels.add(module)
                else:
                    statuses.append('N')
            else:
                statuses.append('F')
                fault_count += 1  # 增加故障计数
                all_panels.add(module)

        # 处理 Lasting Weeks
        if statuses[-1] == 'F':
            consecutive_failures = statuses.count('F')
            statuses.append(consecutive_failures)
        else:
            statuses.append('resolved')

        # 重新组织状态表
        status_table[module] = statuses[-3:] + [fault_count]  # 只保留最后三个状态和故障计数

    return {
        'weeks': ['Last Week', 'This Week', 'Total Lasting Weeks', 'Number of weeks detected fault during the past quarter'],  # 修改列名
        **{panel: status_table.get(panel, ['N'] * 3 + [0]) for panel in all_panels}  # 填充内容
    }

In [5]:
# 设置文件夹路径
crawl_data_folder = r'C:\Users\Oscar\Desktop\文档集合\hkust\PV dead detection\Datatesting'

# 计算每日发电量
def calculate_daily_energy(file_path):
    df = pd.read_csv(file_path, parse_dates=[0], index_col=[0])
    daily_energy = df.sum() / 4 / 1000
    daily_energy_df = pd.DataFrame({'Module': df.columns, 'PV generation (kWh)': daily_energy})
    return daily_energy_df.reset_index(drop=True)

def sum_daily_energy(target_station, start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    date_folders = []
    for folder_name in os.listdir(crawl_data_folder):
        if os.path.isdir(os.path.join(crawl_data_folder, folder_name)):
            try:
                folder_date = pd.to_datetime(folder_name)
                if start_date <= folder_date <= end_date:
                    date_folders.append(folder_name)
            except ValueError:
                pass

    dfs = []
    for date_folder in date_folders:
        folder_path = os.path.join(crawl_data_folder, date_folder)
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv') and target_station in file_name:
                file_path = os.path.join(folder_path, file_name)
                daily_energy_df = calculate_daily_energy(file_path)
                dfs.append(daily_energy_df)

    result_df = pd.concat(dfs).groupby('Module').sum().reset_index()
    return result_df

def find_low_energy_pv(target_station, start_date, end_date, threshold):
    result_df2 = sum_daily_energy(target_station, start_date, end_date)
    mean_energy = result_df2['PV generation (kWh)'].mean()
    std_energy = result_df2['PV generation (kWh)'].std()

    low_energy_pv = result_df2[result_df2['PV generation (kWh)'] < (mean_energy - threshold * std_energy)]

    low_energy_modules = []
    for index, row in low_energy_pv.iterrows():
        percentage_below_average = ((row['PV generation (kWh)'] - mean_energy) / mean_energy) * 100
        low_energy_modules.append((row['Module'], round(percentage_below_average, 2)))

    return low_energy_modules

def get_pv_panel_module(station_name):
    site_info = pd.read_excel(r'C:\Users\Oscar\Desktop\API solaredge.xlsx')
    module_info = site_info.loc[site_info['Site Name'] == station_name, 'PV Panel Module'].values
    return module_info[0] if len(module_info) > 0 else None


In [6]:
def generate_heatmap(low_energy_data, start_date, end_date):
    heatmap_data = []

    # 生成日期范围
    date_range = pd.date_range(start=start_date, end=end_date)

    for station, details in low_energy_data.items():
        low_energy_modules = details['low_energy_modules']
        station_data = []

        for date in date_range:
            daily_percentages = []

            # 遍历每个模块，计算该日期的发电量
            for module, _ in low_energy_modules:
                daily_energy = get_daily_energy(station, module, date)
                avg_energy = get_average_energy(station, date)
                if avg_energy > 0:
                    percentage_below_average = ((daily_energy - avg_energy) / avg_energy) * 100
                    daily_percentages.append(percentage_below_average)

            # 计算该日期的平均百分比
            if daily_percentages:
                station_data.append(sum(daily_percentages) / len(daily_percentages))
            else:
                station_data.append(0)

        heatmap_data.append(station_data)

    # 检查 heatmap_data 的形状
    if not heatmap_data or not all(len(row) == len(date_range) for row in heatmap_data):
        print("Error: heatmap_data is not properly filled.")
        return

    # 创建热力图
    plt.figure(figsize=(7, 8))
    sns.heatmap(heatmap_data, annot=True, cmap='Blues_r', 
                xticklabels=date_range.strftime('%m-%d'), 
                yticklabels=[station for station in low_energy_data.keys()],  # 只显示 Station 名称
                cbar_kws={"orientation": "vertical", "label": "Percentage(%) Below Average"},  # 添加 color bar 标签
                vmin=min(min(row) for row in heatmap_data),  # 设置 color bar 的最小值
                vmax=max(max(row) for row in heatmap_data))  # 设置 color bar 的最大值
    plt.title('Mean Percentage Below Average (%) of Low Generation Modules')
    plt.xlabel('Date')
    #plt.ylabel('PV Station')
    # 缩小字体大小
    plt.xticks(rotation=45, fontsize=8)  # x轴标签旋转45度，字体大小设置为10
    plt.yticks(rotation=0, fontsize=8)    # y轴标签字体大小设置为10
    plt.tight_layout()
    plt.savefig(r'C:\Users\Oscar\Downloads\lowenergyheatmap.png')  # 保存热力图
    plt.close()

    # 生成整体热力图的 Base64 编码（如果需要）
    overall_heatmap_path = r'C:\Users\Oscar\Downloads\lowenergyheatmap.png'
    with open(overall_heatmap_path, "rb") as img_file:
        overall_heatmap_base64 = base64.b64encode(img_file.read()).decode('utf-8')

    return overall_heatmap_base64  # 返回整体热力图的Base64

def generate_daily_heatmap_for_stations(low_energy_data, start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date)

    for station, details in low_energy_data.items():
        low_energy_modules = details['low_energy_modules']

        heatmap_data = []
        for module, _ in low_energy_modules:
            daily_energy = []
            for date in date_range:
                daily_energy_value = get_daily_energy(station, module, date)
                daily_energy.append(daily_energy_value)

            heatmap_data.append(daily_energy)

        # 计算每日平均值
        # 计算每日平均值
        if heatmap_data:
            average_daily_energy = [get_average_energy(station, date) for date in date_range]
            heatmap_data.append(average_daily_energy)
        #if heatmap_data:
            #average_daily_energy = [sum(col) / len(col) if col else 0 for col in zip(*heatmap_data)]
            #heatmap_data.append(average_daily_energy)

        #热力图
        num_rows = len(heatmap_data)  # 行数
        fig_height = max(num_rows * 0.5, 2)  # 每行 0.5 英寸，最小高度为 2 英寸
        plt.figure(figsize=(7, fig_height))  # 固定宽度为 7
        plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.2)
        # 自定义格式化函数
        def format_func(x):
            if x.is_integer():
                return f"{int(x)}"
            else:
                return f"{x:.1f}"  # 保留一位小数
        # 使用 `annot` 参数和自定义格式化函数
        sns.heatmap(heatmap_data, annot=True, fmt='.2f', cmap='YlOrRd', 
                    xticklabels=date_range.strftime('%m-%d'), 
                    yticklabels=[module for module, _ in low_energy_modules] + ['Average'],
                    cbar_kws={"orientation": "vertical", "label": "Energy (kWh)","shrink": 0.5},
                    annot_kws={"size": 8})  # 设置注释文本的字体大小
        # 设置注释文本
        for i in range(len(heatmap_data)):
            for j in range(len(heatmap_data[i])):
                value = heatmap_data[i][j]
                if isinstance(value, float) and value.is_integer():  
                    text = f"{int(value)}"
                else:
                    text = f"{value:.2f}"  # 保留一位小数
                plt.text(j + 0.5, i + 0.5, text, ha='center', va='center', fontsize=8, color='black')  
                
        plt.title(f'Daily Energy of Low Generation Modules for {station}', fontsize=12)  # 统一标题字体大小
        plt.xlabel('Date', fontsize=10)  # 统一 x 轴标签字体大小
        #plt.ylabel('PV Module', fontsize=10)  # 统一 y 轴标签字体大小
        plt.xticks(rotation=45, fontsize=8)  # x 轴标签字体大小
        plt.yticks(rotation=0, fontsize=8)  # y 轴标签字体大小
        plt.tight_layout()        

        # Save heatmap image
        heatmap_path = os.path.join(r'C:\Users\Oscar\Desktop\文档集合\hkust\PV dead detection\images', f'{station}_daily_heatmap.png')
        plt.savefig(heatmap_path)
        plt.close()

def get_daily_energy(station, module, date):
    folder_path = os.path.join(crawl_data_folder, date.strftime('%Y-%m-%d'))
    file_path = os.path.join(folder_path, f'{station}.csv')

    if os.path.exists(file_path):
        df = pd.read_csv(file_path, parse_dates=[0], index_col=[0])
        if module in df.columns:
            return df[module].sum() / 4 / 1000  # 计算当天发电量（kWh）
    return 0  # 如果文件不存在或模块不在数据中，返回0

def get_average_energy(station, date):
    folder_path = os.path.join(crawl_data_folder, date.strftime('%Y-%m-%d'))
    file_path = os.path.join(folder_path, f'{station}.csv')

    if os.path.exists(file_path):
        df = pd.read_csv(file_path, parse_dates=[0], index_col=[0])
        daily_energy = df.sum() / 4 / 1000  # 计算所有模块的发电量（kWh）
        return daily_energy.mean()  # 返回平均值
    return 0  # 如果文件不存在，返回0

In [7]:
def main():
    # 读取总面板数量
    total_panels_df = pd.read_excel(r'C:\Users\Oscar\Desktop\API solaredge.xlsx', usecols=['Site Name', 'Number'])
    total_panels_dict = dict(zip(total_panels_df['Site Name'], total_panels_df['Number']))
    today = datetime.now()
    start_date = (today - timedelta(days=today.weekday())).date()  # 本周一
    end_date = today.date()  # 本周日

    site_info = pd.read_excel(r'C:\Users\Oscar\Desktop\API solaredge.xlsx')
    site_list = site_info['Site Name']

    low_energy_data = {}
    threshold = 2  # 可以根据需要调整阈值

    for target_station in site_list:
        low_energy_modules = find_low_energy_pv(target_station, start_date, end_date, threshold)
        if low_energy_modules:  # 如果存在低能量面板
            pv_module = get_pv_panel_module(target_station)
            low_energy_data[target_station] = {
                'module': pv_module,
                'low_energy_modules': sorted(low_energy_modules, key=lambda x: x[1])  # 按严重程度排序
            }

    generate_heatmap(low_energy_data, start_date, end_date)  # 生成每周热力图
    generate_daily_heatmap_for_stations(low_energy_data, start_date, end_date)  # 生成每日热力图
    generate_report(low_energy_data, start_date, end_date, total_panels_dict, threshold)  # 生成报告

In [8]:
def generate_report(low_energy_data, start_date, end_date, total_panels_dict, threshold):
    env = Environment(loader=FileSystemLoader('.'))
    template = env.get_template('report_template.html')

    # 生成整体热力图的 Base64 编码
    overall_heatmap_base64 = generate_heatmap(low_energy_data, start_date, end_date)

    # 生成Faulty Panel Summary
    faulty_panel_summary = generate_faulty_panel_summary(crawl_data_folder, start_date, end_date, threshold)

    # 使用列表收集kWh_loss
    kWh_losses = [details['kWh_loss'] for details in faulty_panel_summary.values()]
    total_kWh_loss = sum(kWh_losses)  # 计算总和

    # 打印检查
    print(f"Total kWh Loss: {total_kWh_loss}")

    # 统计每个型号的 PV 面板数量
    module_count = {}
    station_summary = {}

    for station, details in low_energy_data.items():
        low_energy_count = len(details['low_energy_modules'])
        total_panels = total_panels_dict.get(station, 0)

        low_energy_percentage = (low_energy_count / total_panels) * 100 if total_panels > 0 else 0

        station_summary[station] = {
            'low_energy_count': low_energy_count,
            'total_panels': total_panels,
            'low_energy_percentage': low_energy_percentage
        }

        module_name = details['module']
        module_count[module_name] = module_count.get(module_name, 0) + low_energy_count

        details['heatmap_path'] = os.path.abspath(os.path.join(r'C:\Users\Oscar\Desktop\文档集合\hkust\PV dead detection\images', f'{station}_daily_heatmap.png'))

        with open(details['heatmap_path'], "rb") as img_file:
            details['daily_heatmap_base64'] = base64.b64encode(img_file.read()).decode('utf-8')

        # 生成状态表
        details['status_table'] = generate_status_table(details['low_energy_modules'], start_date, end_date)

    # 渲染HTML内容
    html_content = template.render(
        low_energy_data=low_energy_data,
        start_date=start_date,
        end_date=end_date,
        station_summary=station_summary,
        module_count=module_count,
        overall_heatmap_base64=overall_heatmap_base64,
        faulty_panel_summary=faulty_panel_summary,
        total_kWh_loss=total_kWh_loss  # 新增的内容
    )

    with open('weekly_report.html', 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 确保 wkhtmltopdf 的路径正确
    path_to_wkhtmltopdf = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
    config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)

    options = {
        'disable-smart-shrinking': None,
        'no-stop-slow-scripts': None
    }

    # 生成PDF
    pdfkit.from_file('weekly_report.html', 'weekly_report.pdf', configuration=config, options=options)

In [11]:
if __name__ == "__main__":
    main()

Total kWh Loss: 284.2902741826768
