In [None]:
import pandas as pd
import numpy as np
from google.cloud import bigquery
import google.auth
import plotly.express as px
import plotly.graph_objects as go
import os
from typing import List

In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./storage.json"
credentials, your_project_id = google.auth.default(
    scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
bq = bigquery.Client()

In [None]:
class performance_check_plant_id:
    def __init__(self,resource_category,filename,pred_time,check_start_date,check_end_date,train_file,predict_file):
        self.resource_category = resource_category
        self.filename = filename
        self.pred_time = pred_time
        self.check_start_date = check_start_date
        self.check_end_date = check_end_date
        self.train_file = train_file
        self.predict_file = predict_file
        
    def read_result_file(self):
        result_df = pd.read_csv("../"+self.resource_category+"_연구/결과파일/"+self.filename+self.pred_time+'.csv',index_col=0)
        result_df = result_df[(result_df.base_date>=self.check_start_date)&(result_df.base_date<=self.check_end_date)].reset_index(drop=True)
        result_df['gen_hour'] = result_df['pv_amount'] / result_df['kpx_capacity']
        result_df['gen_hour_pred'] = result_df['pv_amount_prediction'] / result_df['kpx_capacity']
        return result_df
    
    def show_plotly_plant_id_result(self, plant_ids : List, result_df : pd.DataFrame,solar_ts_ns_df : pd.DataFrame):
        """ 
        발전소 id별로 결과 이미지를 저장해준다.
        
        Args:
            plant_ids : 발전소 ID들의 모임 List
            result_df : 전체 결과 DataFrame
        return
            plotly 이미지를 html로 저장
        """
        if not os.path.exists('./plotly_image_plant_ids/'+self.resource_category):
            os.makedirs('./plotly_image_plant_ids/'+self.resource_category)
        for plant_id in plant_ids:
            result_df_plant_id = result_df[result_df.plant_id==int(plant_id)].reset_index(drop=True)
            result_df_plant_id_ = pd.merge(result_df_plant_id,solar_ts_ns_df)
            fig = px.line(result_df_plant_id_,x='base_time',y=['solar_radiation_rate','total_shortwave_rate','gen_hour','gen_hour_pred'],title=str(plant_id))
            fig.write_html("./plotly_image_plant_ids/"+self.resource_category+'/'+str(plant_id)+'.html')
            
    def show_plotly_station_plant_id(self, performance_plant_result_id : pd.DataFrame):
        """
        발전소 ID 결과와 발전소 위치, 기상관측소 위치를 plotly를 사용해 시각화.
        
        Args:
            performance_plant_result_id : 발전소 ID별로 결과와 위치, 기상관측소 위치 정보들의 DatFrame
        return : 
            plotly 이미지를 html로 저장
        """
        if not os.path.exists('./plotly_image_plant_ids/'+self.resource_category):
            os.makedirs('./plotly_image_plant_ids/'+self.resource_category)
            
        station_information = performance_plant_result_id[['station_id','station_latitude','station_longitude']].drop_duplicates().reset_index(drop=True)
        fig = px.scatter_mapbox(performance_plant_result_id,
                        lat='plant_latitude',lon='plant_longitude',
                        hover_data=['nmae_plant_id','rad_nmae'],color='nmae_plant_id',
                        width=1000,height=800,zoom=10,mapbox_style='open-street-map')
        fig2 = px.scatter_mapbox(station_information,lat='station_latitude',lon='station_longitude',
                                hover_data=['station_id'],
                                width=1000,height=800,zoom=10,mapbox_style='open-street-map')
        fig2.update_traces(marker={'size':12,'color':'red'})
        fig.add_trace(fig2.data[0])
        fig.write_html("./plotly_image_plant_ids/"+self.resource_category+'/'+self.resource_category+'_information.html')
        
    def make_radiation_df(self):
        plant_rad_nmae = []
        train_df = pd.read_csv(self.train_file,index_col=0)
        prediction_df = pd.read_csv(self.predict_file,index_col=0)
        prediction_df = prediction_df[prediction_df.pred_time==int(self.pred_time)].reset_index(drop=True)
        solar_rad = train_df[['plant_id','base_date','base_hour','solar_radiation']]
        solar_rad['solar_radiation_rate'] = solar_rad['solar_radiation']/1000
        prediction_rad = prediction_df[['plant_id','base_date','base_hour','total_shortwave','net_shortwave']].reset_index(drop=True)
        prediction_rad['total_shortwave_rate'] = prediction_rad['total_shortwave']/1000
        prediction_rad['net_shortwave_rate'] = prediction_rad['net_shortwave']/1000
        solar_ts_ns_rad = pd.merge(solar_rad,prediction_rad,on=['plant_id','base_date','base_hour'])
        solar_ts_ns_rad['rad_nmae'] = abs(solar_ts_ns_rad['total_shortwave']-solar_ts_ns_rad['solar_radiation'])/1000
        for plant_id in solar_ts_ns_rad.plant_id.unique():
            solar_ts_ns_rad_plant_id = solar_ts_ns_rad[(solar_ts_ns_rad.plant_id==plant_id)&(solar_ts_ns_rad.base_hour>=8)&(solar_ts_ns_rad.base_hour<=17)].reset_index(drop=True)
            solar_nmae = np.mean(solar_ts_ns_rad_plant_id.rad_nmae)
            plant_rad_nmae.append([plant_id,np.round(solar_nmae,4)])
        plant_rad_nmae_df = pd.DataFrame(plant_rad_nmae,columns=['plant_id','rad_nmae'])
        return solar_ts_ns_rad,plant_rad_nmae_df
    
    @staticmethod
    def get_nmae(result_merge : pd.DataFrame):
        tot_nmae = np.mean(result_merge.nmae[result_merge.use>=0.1])
        return tot_nmae
    
    @staticmethod
    def monthly_result_report(monthly_result_df:pd.DataFrame):
        monthly_plant_result = []
        for plant_id in monthly_result_df.plant_id.unique():
            fslc_pred_by_plant = monthly_result_df[monthly_result_df.plant_id==plant_id].reset_index(drop=True)
            monthly_plant_result.append([plant_id,np.round(performance_check_plant_id.get_nmae(fslc_pred_by_plant),4)])
        monthly_plant_result_df = pd.DataFrame(monthly_plant_result,columns=['plant_id','nmae_plant_id'])
        return monthly_plant_result_df
    
    @staticmethod
    def plant_information(monthly_result_df : pd.DataFrame,solar_ts_ns_df : pd.DataFrame):
        plant_informaiton_query = f"""
            WITH A as (
                SELECT CAST(id as string)plant_id, name as plant_name
                FROM sc-data-mart.mysql_tables.plant_master
            )
            select A.*,B.pv_capacity,B.station_id,B.icsr_station_id,B.icsr_distance_km,B.plant_latitude,B.plant_longitude,C.station_latitude,C.station_longitude
            from A
            inner join sc-data-mart.feature_store.plant_mapping_table as B 
                on A.plant_id = B.plant_id
            inner join sc-data-mart.mysql_tables.climate_location as C 
                on B.station_id = C.station_id where B.station_id = B.icsr_station_id
        """
        plant_information_df = bq.query(plant_informaiton_query).to_dataframe()
        monthly_result_df.plant_id = monthly_result_df.plant_id.astype(str)
        monthly_result_df = pd.merge(monthly_result_df,plant_information_df,on=['plant_id'])
        monthly_result_df = monthly_result_df.sort_values(by=['nmae_plant_id']).reset_index(drop=True)
        solar_ts_ns_df.plant_id = solar_ts_ns_df.plant_id.astype(int)
        monthly_result_df.plant_id = monthly_result_df.plant_id.astype(int)
        monthly_result_df = pd.merge(monthly_result_df,solar_ts_ns_df,on=['plant_id'])
        monthly_result_df = monthly_result_df[[
            'plant_id', 'nmae_plant_id','rad_nmae','plant_name', 'pv_capacity', 'station_id',
            'icsr_station_id', 'icsr_distance_km', 'plant_latitude',
            'plant_longitude', 'station_latitude', 'station_longitude']]
        return monthly_result_df,tuple(monthly_result_df.plant_id.unique())
        

In [None]:
performance_check_plant_id = performance_check_plant_id(
    resource_category = "fSLC003",
    filename = "result_ldaps_",
    pred_time = '1',
    check_start_date = "2022-11-01",
    check_end_date = "2022-12-26",
    train_file = '../fSLC003_연구/fSLC003_train.csv',
    predict_file = '../fSLC003_연구/fSLC003_val.csv'
)

In [None]:
result_df = performance_check_plant_id.read_result_file()
monthly_result = performance_check_plant_id.monthly_result_report(result_df)
rad_solar_pred,plant_rad_nmae = performance_check_plant_id.make_radiation_df()
performance_result_plant_id,plant_ids = performance_check_plant_id.plant_information(monthly_result,plant_rad_nmae)
# ## plant_id 별 결과 요약
performance_check_plant_id.show_plotly_plant_id_result(plant_ids,result_df,rad_solar_pred)
performance_check_plant_id.show_plotly_station_plant_id(performance_result_plant_id)