In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:

def transform_rad(data):
    # # data is one column of dataframe, such as df[column1]
    data = np.log1p(data)  # log1p handles zero values
    data = np.power(data, 6)
    return data

def transform_intensity(data):
    data = data
    return data

In [3]:
def head_tail_filter(df):
    # there are two columns a and b in the df
    # retain rows between the 0.1*max(a) and 0.9*max(a)
    # retain rows between the 0.1*max(b) and 0.9*max(b)
    # return the filtered df
    max_a = df.iloc[:,0].max()
    max_b = df.iloc[:,1].max()
    df = df[(df.iloc[:,0] > 0) & (df.iloc[:,0] <= 0.98*max_a)]
    df = df[(df.iloc[:,1] > 0) & (df.iloc[:,1] <= 0.98*max_b)]
    return df

In [4]:
def linear_fit(rad_current_df):
    # if df is empty, return 0
    if rad_current_df.empty:
        return 0
    # print(rad_current_df.columns)
    # Fit linear regression model
    X = transform_rad(rad_current_df.iloc[:, 0].values.reshape(-1, 1))  # First column as X
    y = transform_intensity(rad_current_df.iloc[:, 1].values.reshape(-1, 1))  # Second column as y
    
    model = LinearRegression()
    model.fit(X, y)
    
    # Return slope (a) from y = ax + b
    return model.coef_[0][0]

In [5]:
def calc_degradation_score(set):
    # if one of the number is 0 or smaller than 0, return 0
    if any(x <= 0 for x in set):
        return 0
    if set[-1] > set[0]:
        return 0
    if (set[0] - set[-1]) / set[0] >=0.5:
        return 0
    else:
        return (set[0] - set[-1]) / set[0]

In [6]:

folder_path = "2023_2024_csv_data"
env_file_name = "env.csv"
rad_variable = 'irradiance0'
start_hour = 9
end_hour = 15


months = [1,2,3]
years = [2023, 2024, 2025]

env_df = pd.read_csv(os.path.join(folder_path, env_file_name))
env_df['avg'] = (env_df["irradiance0"] + env_df["irradiance1"]) / 2

result = []
for file in os.listdir(folder_path):
    if '-' in file and '038' in file:
        df = pd.read_csv(os.path.join(folder_path, file))
        df = df.merge(env_df, on='time', how='left')
        df = df.fillna(0)

        df['year'] = pd.to_datetime(df['time']).dt.year
        df['month'] = pd.to_datetime(df['time']).dt.month
        df['hour'] = pd.to_datetime(df['time']).dt.hour
        df = df[(df['hour'] >= start_hour) & (df['hour'] <= end_hour)]
        intensity_cols = [col for col in df.columns if 'intensity' in col.lower()]
        for col in intensity_cols:
            degrad_set = []
            for month in months:
                df_month = df[df['month'] == month]
                degradation_curve = []
                for year in years:
                    df_year = df_month[df_month['year'] == year]
                    new_df = df_year[[rad_variable, col]]
                    new_df =  head_tail_filter(new_df)
                    degrad_val = linear_fit(new_df)
                    degradation_curve.append(degrad_val)
                score = calc_degradation_score(degradation_curve)
                degrad_set.append(score)
            
            id = col.replace('-intensity', '')
            result.append({'pid': id, 'score': sum(degrad_set)})

result = pd.DataFrame(result)
result.to_csv('result.csv', index=False)




In [7]:
result = pd.read_csv('result.csv')
all_pids = pd.read_csv('all_pids.csv')
report = pd.read_csv('report_merged.csv')

In [8]:
# remove rows in result where pid is not in all_pids
result = result[result['pid'].isin(all_pids['pid'])]

# sort result based on score from high to low
result = result.sort_values(by='score', ascending=False)

# merge result with report
result = pd.merge(result, report, on='pid', how='left')

# print the overall number of rows in the first 20 rows whose flag is not NaN
aligned_result = result[:20]

aligned_result.to_csv('aligned_result.csv', index=False)

# filter result based on flag which is not NaN
aligned_result = aligned_result[aligned_result['flag'].notna()]

# print the number of rows in aligned_result
print(len(aligned_result))







11


In [9]:
# irradiance0 10-16 5 (13)
# irradiance0 10-16 6 (14)
# irradiance0 9-17 5 (14)
# avg         9-16 6 (14)
# irradiance0 9-16 5 (15)
# irradiance0 9-16 6 (15)
# irradiance0 9-16 6 (15) 0.95
# irradiance0 9-16 6 (16) 0.98
