In [None]:
# Libraries

import pandas as pd
import numpy as np
import pickle
from os import path

In [None]:
# Load the data

data = pd.read_csv('data.csv')

df = pd.DataFrame({
    'GHI': data.ghi,
    'GHI_cs': data.ghi_cs,
    'kt_mod': data.kt_mod,
    'delta_kt_mod': data.delta_kt_mod
})

df = df.set_index(pd.date_range(start='XXXX-XX-XX', periods=1440, freq='T')) # Complete the index to a date range starting from XXXX-XX-XX with a frequency of 1 minute (1440 periods in a day)

df = df.dropna()

In [None]:
# Calculate the features

def calculate_features(window):

    GHI = window['GHI']
    GHI_cs = window['GHI_cs']
    kt = window['kt_mod']
    delta_kt_modificado = window['delta_kt_mod']
    delta_GHI = GHI.diff().iloc[1:]
    delta_GHI_cs = GHI_cs.diff().iloc[1:]


    features = {
        'MEAN_KT': np.mean(kt),
        'STD_KT': np.std(kt),
        'MAX_KT': np.max(kt),
        'KURT_KT': pd.Series(kt).kurt(),
        'SKEW_KT': pd.Series(kt).skew(),
        'STD_VI': np.std(np.sqrt(delta_GHI**2 + 1) / np.sqrt(delta_GHI_cs**2 + 1)),
        'RMSD': np.sqrt(np.mean((GHI - GHI_cs)**2)),
        'AVG_VARIABILITY': (delta_kt_mod.rolling(4, center=True).std()).mean()
    }

    return pd.Series(features)

In [None]:
# Feature list for 33 minute window

features_list = []

start = df.index[0]
while start + pd.DateOffset(minutes=33) <= df.index[-1]:
    end = start + pd.DateOffset(minutes=33)
    window = df[start:end]

    features = calculate_features(window)

    if not features.isnull().values.any():
        features_list.append(features)

    start = end

features_df = pd.DataFrame(features_list)

end_times = pd.date_range(start=df.index[0] + pd.DateOffset(minutes=33) - pd.Timedelta('1s'), end=df.index[-1], freq='33T')
features_df.index = end_times[:len(features_df)]

features_df.index = features_df.index.round('T')

In [None]:
# Run the model

model_filename = path.join('cloud-classifcation.pkl')
with open(model_filename, 'rb') as f:
    model = pickle.load(f)['model']
    le = pickle.load(f)['label_encoder']
print(model)

# Make predictions

predic = model.predict(features_df)

features_df['Predictions'] = predic

num_to_class = {
    0: 'Ac-Cc',
    1: 'Ci-Cs',
    2: 'Clear-sky',
    3: 'Cb-Ns',
    4: 'Cu',
    5: 'Sc-St-As'
}

features_df['Cloud_class'] = features_df['Predictions'].map(num_to_class)