In [3]:
# Install required libraries
!pip install pandas numpy scikit-learn xgboost matplotlib seaborn shap joblib requests plotly

# Import libraries
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import product
from sklearn import metrics
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
import shap
import joblib
import warnings
import pickle
from urllib.request import urlopen
import requests
import plotly.express as px
from datetime import date, timedelta

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

# Define constants
config = {
    'file_path': "https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/main/SunPower_Full.csv",
    'target_variable': 'Active_Power',
    'predictors': ['temperature_2m', 'relativehumidity_2m', 'direct_radiation', 'diffuse_radiation', 'windspeed_10m', 'cloudcover', 'season'],
    'categorical_variables': ['season'],
    'time_intervals': ['first_interval', 'second_interval', 'third_interval', 'fourth_interval', 'fifth_interval', 'sixth_interval'],
    'weather_types': ['TypeA', 'TypeB', 'TypeC'],
    'standardize_predictor_list': ['temperature_2m', 'relativehumidity_2m', 'direct_radiation', 'diffuse_radiation', 'windspeed_10m', 'cloudcover']
}

# Load data
def load_data(file_path):
    df = pd.read_csv(file_path, sep='\t')
    df.rename(columns={'timestamp': 'date'}, inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df[config['target_variable']] = df[config['target_variable']].clip(lower=0)  # Set negative values to 0
    return df

# Add season
def add_season(df):
    def season(month):
        if month in [12, 1, 2]:
            return 'winter'
        elif month in [3, 4, 5]:
            return 'spring'
        elif month in [6, 7, 8]:
            return 'summer'
        else:
            return 'fall'
    df['season'] = df['date'].dt.month.apply(season)
    return df

# Choose only 7-18 interval
def choose_interval(df):
    df = df.sort_values('date')
    df = df.set_index('date')
    df = df.between_time('07:00', '18:00')
    return df

# Split data
def split_data(df):
    ord_enc = OrdinalEncoder()
    season = ord_enc.fit_transform(np.array(df['season']).reshape(-1, 1))
    df['season'] = season
    cutoff_date = df.index.min() + pd.DateOffset(years=7)
    train = df.loc[:cutoff_date]
    test = df.loc[cutoff_date + pd.DateOffset(hours=1):]
    return train, test

# Detect time interval
def detect_time_interval(df):
    df_time_detect = df.copy()
    intervals = {'first_interval': (7, 9), 'second_interval': (9, 11), 'third_interval': (11, 13),
                 'fourth_interval': (13, 15), 'fifth_interval': (15, 17), 'sixth_interval': (17, 18)}
    df_time_detect['time_interval'] = pd.cut(df_time_detect.index.hour, bins=[interval[0] for interval in intervals.values()] + [24],
                                             labels=[interval_name for interval_name in intervals.keys()],
                                             include_lowest=True, right=False)
    return df_time_detect

# Create weather type
def create_weather_type(train):
    new_train = pd.DataFrame()
    for interval in config['time_intervals']:
        train_df = train[train['time_interval'] == interval].copy()
        weather_type = []
        avg_kwh = np.mean(train_df[config['target_variable']])
        max_kwh = max(train_df[config['target_variable']])
        min_kwh = min(train_df[config['target_variable']])
        for y in train_df[config['target_variable']]:
            if y >= avg_kwh:
                weather_type.append("TypeA")
            elif (y > avg_kwh - (avg_kwh - min_kwh) / 2) and (y < avg_kwh):
                weather_type.append("TypeB")
            elif (y >= 0) and (y <= avg_kwh - (avg_kwh - min_kwh) / 2):
                weather_type.append("TypeC")
            else:
                raise ValueError(f"Something wrong happened in weather type classification for {interval}")
        train_df['weather_type'] = weather_type
        new_train = pd.concat([new_train, train_df])
    new_train = new_train.sort_index()
    return new_train

# Train random forest classifier
def train_rf_classifier(X_train, y_train):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    rfc = RandomForestClassifier()
    grid_search = GridSearchCV(rfc, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    return grid_search

# Predict weather type
def predict_weather_type(grid_search, X_test):
    y_pred = grid_search.best_estimator_.predict(X_test)
    X_test.loc[:, 'weather_type'] = y_pred
    return X_test

# Classify weather type
def classify_weather_forecast_type(df, new_train):
    new_df = pd.DataFrame()
    for interval in config['time_intervals']:
        interval_dataset = df[df['time_interval'] == interval].copy()
        try:
            grid = joblib.load(urlopen(f'https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/master/ClassifiedWeatherTypes/RF_Weather_{interval}_.pkl'))
        except Exception as e:
            print(f"Failed to load model for {interval}. Retraining...")
            # Retrain the model using new_train
            interval_train_dataset = new_train[new_train['time_interval'] == interval].copy()
            grid = train_rf_classifier(interval_train_dataset[config['predictors']], interval_train_dataset['weather_type'])
            joblib.dump(grid, f'RF_Weather_{interval}_.pkl')  # Save the retrained model

        classified_weather_type = predict_weather_type(grid, interval_dataset[config['predictors']].copy())
        classified_weather_type['time_interval'] = interval
        print(f"Weather type Predictions done for {interval}")
        new_df = pd.concat([new_df, classified_weather_type])
    new_df = new_df.sort_index()
    return new_df

# Standardize data
def standardize_data_weather_forecast(df):
    X_new_test = df[config['standardize_predictor_list']]
    # Load fitted predictor
    predictor_scaler_fit = joblib.load(urlopen(f'https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/master/Fitted_Standardizers/std_scaler.bin'))
    X_new_test = predictor_scaler_fit.transform(X_new_test)

    new_stand_df = pd.DataFrame(X_new_test, index=df[config['standardize_predictor_list']].index, columns=df[config['standardize_predictor_list']].columns)
    new_stand_df = pd.concat([new_stand_df, df[['season', 'weather_type', 'time_interval']]], axis=1)
    return new_stand_df

# Predict forecast using MLP
def predict_forecast_MLP(new_stand_test):
    forecast_test = pd.DataFrame()
    for interval, weather_type in product(config['time_intervals'], config['weather_types']):
        X_test = new_stand_test[(new_stand_test['time_interval'] == interval) & (new_stand_test['weather_type'] == weather_type)][config['predictors']]
        if len(X_test) != 0:
            md = joblib.load(urlopen(f'https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/master/Fitted_Models/MLP_fitted_{interval}_{weather_type}.pkl'))
            predictions = md.predict(X_test)
            print(f"Energy Predictions done for {interval}, {weather_type}")
            TestingData = pd.DataFrame(data=X_test.copy(), columns=X_test.columns)
            TestingData['PredictedTotalPower'] = predictions
            forecast_test = pd.concat([forecast_test, TestingData])
    forecast_test = forecast_test.sort_index()
    return forecast_test

# Get weather forecast data
def get_weather_forecast_data():
    lat = -23.760363
    long = 133.874719

    Predictors = ['temperature_2m', 'relativehumidity_2m', 'direct_radiation', 'diffuse_radiation', 'windspeed_10m', 'cloudcover']
    start_date = str(date.today() + timedelta(days=1))
    end_date = str(date.today() + timedelta(days=4))

    r = requests.get('https://api.open-meteo.com/v1/forecast', params={'latitude': lat, 'longitude': long, 'timezone': 'auto', 'start_date': start_date, 'end_date': end_date, 'hourly': Predictors}).json()

    weather_df = pd.DataFrame(columns=Predictors)
    time = pd.to_datetime(np.array(r['hourly']['time']))
    weather_df['date'] = time
    for p in Predictors:
        weather_df[p] = np.array(r['hourly'][p])
    weather_df['date'] = pd.to_datetime(weather_df['date'])
    return weather_df

# Main workflow# Step 1: Load and preprocess the data
df = load_data(config['file_path'])
df = add_season(df)
df = choose_interval(df)
train, test = split_data(df)
train = detect_time_interval(train)
new_train = create_weather_type(train)

# Step 2: Get weather forecast data
weather_forecast_df = get_weather_forecast_data()
weather_forecast_df = add_season(weather_forecast_df)
weather_forecast_df = choose_interval(weather_forecast_df)
weather_forecast_df = detect_time_interval(weather_forecast_df)

# Encode the 'season' column in the weather forecast data
ord_enc = OrdinalEncoder()
weather_forecast_df['season'] = ord_enc.fit_transform(np.array(weather_forecast_df['season']).reshape(-1, 1))

# Step 3: Classify weather types using new_train
weather_forecast_df = classify_weather_forecast_type(weather_forecast_df, new_train)

# Step 4: Standardize the data
weather_forecast_df_standardized = standardize_data_weather_forecast(weather_forecast_df)

# Step 5: Predict the forecast
predicted_forecast = predict_forecast_MLP(weather_forecast_df_standardized)
predicted_forecast = pd.concat([predicted_forecast, weather_forecast_df_standardized[['weather_type']]], axis=1)

# Step 6: Unstandardize the data
predictor_scaler_fit = joblib.load(urlopen(f'https://raw.githubusercontent.com/Vaishnav8395/ReGenCast/master/Fitted_Standardizers/std_scaler.bin'))
unst_data = predictor_scaler_fit.inverse_transform(predicted_forecast[config['standardize_predictor_list']])
predicted_forecast_unst = predicted_forecast.copy()
predicted_forecast_unst[config['standardize_predictor_list']] = unst_data

# Step 7: Plot the forecast
fig = px.line(predicted_forecast_unst.reset_index(), x='date', y='PredictedTotalPower', title="Forecast - Energy Generation for the next 3 days")
fig.show()

Weather type Predictions done for first_interval
Weather type Predictions done for second_interval
Weather type Predictions done for third_interval
Weather type Predictions done for fourth_interval
Weather type Predictions done for fifth_interval
Weather type Predictions done for sixth_interval
Energy Predictions done for first_interval, TypeA
Energy Predictions done for first_interval, TypeC
Energy Predictions done for second_interval, TypeA
Energy Predictions done for second_interval, TypeB
Energy Predictions done for third_interval, TypeA
Energy Predictions done for fourth_interval, TypeA
Energy Predictions done for fourth_interval, TypeB
Energy Predictions done for fifth_interval, TypeA
Energy Predictions done for fifth_interval, TypeB
Energy Predictions done for sixth_interval, TypeA
Energy Predictions done for sixth_interval, TypeB
Energy Predictions done for sixth_interval, TypeC


In [1]:
!pip install scikit-learn==1.2.2



In [2]:
from sklearn.model_selection import GridSearchCV