In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
import warnings

warnings.filterwarnings('ignore')

# Configuration
config = {
    'file_path': "https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/main/SunPower_Full.csv",
    'target_variable': 'Active_Power',
    'predictors': ['temperature_2m', 'relativehumidity_2m', 'direct_radiation',
                   'diffuse_radiation', 'windspeed_10m', 'cloudcover', 'season'],
    'categorical_variables': ['season'],
    'standardize_predictor_list': ['temperature_2m', 'relativehumidity_2m',
                                    'direct_radiation', 'diffuse_radiation',
                                    'windspeed_10m', 'cloudcover']
}

# Load Data
def load_data(file_path):
    df = pd.read_csv(file_path, sep='\t')
    df.rename(columns={'timestamp': 'date'}, inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df[config['target_variable']] = df[config['target_variable']].clip(lower=0)
    return df

# Add Season
def add_season(df):
    def season(month):
        if month in [12, 1, 2]: return 'winter'
        elif month in [3, 4, 5]: return 'spring'
        elif month in [6, 7, 8]: return 'summer'
        else: return 'fall'
    df['season'] = df['date'].dt.month.apply(season)
    return df

# Choose 7-18 Interval
def choose_interval(df):
    df = df.sort_values('date')
    df = df.set_index('date')
    df = df.between_time('07:00', '18:00')
    return df

# Split Data
def split_data(df):
    ord_enc = OrdinalEncoder()
    df['season'] = ord_enc.fit_transform(df[['season']])
    cutoff_date = df.index.min() + pd.DateOffset(years=7)
    train = df.loc[:cutoff_date]
    test = df.loc[cutoff_date + pd.DateOffset(hours=1):]
    return train, test

# Standardize Data
def standardize_data(train, test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(train[config['standardize_predictor_list']])
    X_test = scaler.transform(test[config['standardize_predictor_list']])
    y_train = train[config['target_variable']]
    y_test = test[config['target_variable']]
    return X_train, X_test, y_train, y_test

# Train MLP Regressor
def train_mlp_regressor(X_train, y_train):
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (100, 50)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs'],
        'alpha': [0.0001, 0.001],
        'learning_rate': ['constant', 'adaptive']
    }
    mlp = MLPRegressor(max_iter=500)
    grid_search = GridSearchCV(mlp, param_grid, scoring='neg_mean_absolute_error', cv=3)
    grid_search.fit(X_train, y_train)
    return grid_search

# Evaluate Model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = metrics.mean_absolute_error(y_test, y_pred)
    mse = metrics.mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = metrics.r2_score(y_test, y_pred)
    print("Evaluation Metrics:")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R²: {r2:.2f}")
    return y_pred

# Main Function
def main():
    df = load_data(config['file_path'])
    df = add_season(df)
    df = choose_interval(df)
    train, test = split_data(df)
    X_train, X_test, y_train, y_test = standardize_data(train, test)

    mlp_model = train_mlp_regressor(X_train, y_train)
    print("Best Parameters:", mlp_model.best_params_)
    y_pred = evaluate_model(mlp_model, X_test, y_test)

    test['Predicted_Power'] = y_pred
    print(test[['Predicted_Power', config['target_variable']]].head())

if __name__ == "__main__":
    main()

Best Parameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (100, 50), 'learning_rate': 'adaptive', 'solver': 'lbfgs'}
Evaluation Metrics:
MAE: 0.35
RMSE: 0.57
R²: 0.87
                     Predicted_Power  Active_Power
date                                              
2020-07-22 08:00:00         0.625480      0.781067
2020-07-22 09:00:00         2.007349      2.222433
2020-07-22 10:00:00         3.178919      3.266800
2020-07-22 11:00:00         4.063908      3.996233
2020-07-22 12:00:00         4.349465      4.325633
