In [1]:

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.metrics import classification_report
from prophet import Prophet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import joblib
# Load data
import os
main_dir = os.getcwd().rsplit("\\", 2)[0]


  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [2]:
df = pd.read_csv(f"{main_dir}/data/processed/cleaned_data_final.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63204202 entries, 0 to 63204201
Data columns (total 19 columns):
 #   Column              Dtype  
---  ------              -----  
 0   date                object 
 1   energy_source       object 
 2   power_MW            float64
 3   maintenance_status  int64  
 4   respondent          object 
 5   latitude            float64
 6   longitude           float64
 7   capacity_MW         float64
 8   year                int64  
 9   plantcode           float64
 10  plantname           object 
 11  nearest_station     object 
 12  wind_speed          float64
 13  precipitation       float64
 14  temperature_avg     float64
 15  wind_volatility     float64
 16  sentiment_score     float64
 17  site_density        float64
 18  output_efficiency   float64
dtypes: float64(12), int64(2), object(5)
memory usage: 8.9+ GB


In [4]:
df['date'] = pd.to_datetime(df['date'], format='mixed')

In [None]:
# # Prophet for forecasting
# prophet_df = df[df['energy_source'] == 'wind'][['date', 'power_MW', 'wind_speed', 'precipitation', 'temperature_avg', 'wind_volatility']].rename(columns={'date': 'ds', 'power_MW': 'y'})
# prophet_model = Prophet()
# for col in ['wind_speed', 'precipitation', 'temperature_avg', 'wind_volatility']:
#     prophet_model.add_regressor(col)
# prophet_model.fit(prophet_df)
# joblib.dump(prophet_model, "models/prophet_wind.pkl")

In [5]:
# Prophet for forecasting (downsample to daily mean to avoid MemoryError)
wind_df = df[df['energy_source'] == 'wind'][['date', 'power_MW', 'wind_speed', 'precipitation', 'temperature_avg', 'wind_volatility']].copy()
wind_df['date'] = pd.to_datetime(wind_df['date'])

In [6]:
# Resample to daily mean
daily_df = wind_df.set_index('date').resample('D').mean().reset_index()
prophet_df = daily_df.rename(columns={'date': 'ds', 'power_MW': 'y'})
prophet_model = Prophet()
for col in ['wind_speed', 'precipitation', 'temperature_avg', 'wind_volatility']:
    prophet_model.add_regressor(col)
prophet_model.fit(prophet_df)

16:03:19 - cmdstanpy - INFO - Chain [1] start processing
16:03:20 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x12f647ae3e0>

In [7]:
joblib.dump(prophet_model, f"{main_dir}/models/prophet_wind.pkl")

['c:\\Users\\stuar\\Desktop\\Renewable Energy Maintenance/models/prophet_wind.pkl']

In [8]:
# Random Forest for maintenance
features = ['wind_speed', 'precipitation', 'temperature_avg', 'wind_volatility', 'power_MW', 'output_efficiency']
X = df[features].fillna(0)
y = df['maintenance_status']


In [9]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)
print(classification_report(y, rf_model.predict(X)))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00  50579373
           1       1.00      1.00      1.00  12624829

    accuracy                           1.00  63204202
   macro avg       1.00      1.00      1.00  63204202
weighted avg       1.00      1.00      1.00  63204202



In [10]:
joblib.dump(rf_model, f"{main_dir}/models/random_forest_maintenance.pkl")

['c:\\Users\\stuar\\Desktop\\Renewable Energy Maintenance/models/random_forest_maintenance.pkl']

In [11]:
# Isolation Forest for anomaly detection
iso_model = IsolationForest(contamination=0.1, random_state=42)
df['anomaly'] = iso_model.fit_predict(X)
df[['date', 'energy_source', 'power_MW', 'anomaly']].to_csv(f"{main_dir}/data/processed/anomalies_final.csv", index=False)
# LSTM (simplified)
# Add LSTM logic if needed (requires reshaping data for sequences)