In [5]:
!pip install xgboost tensorflow pandas numpy scikit-learn matplotlib




In [4]:
import numpy as np
import pandas as pd
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("/content/complete_solar_data2.csv", parse_dates=["DATE_TIME"])
df.sort_values("DATE_TIME", inplace=True)
df.drop_duplicates(inplace=True)

# Feature Engineering
df["Hour"] = df["DATE_TIME"].dt.hour
df["Day"] = df["DATE_TIME"].dt.day
df["Month"] = df["DATE_TIME"].dt.month
df["Year"] = df["DATE_TIME"].dt.year

# Drop original timestamp
df.drop(columns=["DATE_TIME"], inplace=True)

# Splitting features and target
target = "AC_POWER"
features = df.drop(columns=[target])
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    features, y, test_size=0.2, random_state=42, shuffle=False
)

# Train XGBoost Model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1)
xgb_model.fit(X_train, y_train)

# Use XGBoost Predictions as Features
xgb_train_pred = xgb_model.predict(X_train).reshape(-1, 1)
xgb_test_pred = xgb_model.predict(X_test).reshape(-1, 1)

# Normalize Data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Append XGBoost predictions to scaled data
X_train_scaled = np.hstack((X_train_scaled, xgb_train_pred))
X_test_scaled = np.hstack((X_test_scaled, xgb_test_pred))

# Ensure the dataset is large enough for sequences
time_steps = 10
if len(X_train_scaled) <= time_steps or len(X_test_scaled) <= time_steps:
    raise ValueError("Dataset too small for LSTM sequence creation!")

# Function to create LSTM sequences
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i : i + time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

# Create sequences
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train.to_numpy(), time_steps)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test.to_numpy(), time_steps)

# Build LSTM Model
lstm_model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')

# Train LSTM Model
lstm_model.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, validation_data=(X_test_seq, y_test_seq))

# Predict with LSTM
y_pred_lstm = lstm_model.predict(X_test_seq)

# Ensure equal lengths before averaging
final_pred = (y_pred_lstm.flatten() + xgb_test_pred[time_steps:].flatten()) / 2

# Print first 10 predictions for verification
print("Final Hybrid Model Predictions:", final_pred[:10])


  super().__init__(**kwargs)


Epoch 1/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 6ms/step - loss: 236727.1406 - val_loss: 116740.6953
Epoch 2/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 209614.2656 - val_loss: 100673.4531
Epoch 3/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 187288.7812 - val_loss: 86757.0547
Epoch 4/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 168723.7031 - val_loss: 74735.3906
Epoch 5/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 150094.5156 - val_loss: 64328.5781
Epoch 6/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 135669.6094 - val_loss: 55599.9609
Epoch 7/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 122087.6094 - val_loss: 48422.3398
Epoch 8/20
[1m1430/1430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m

In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
df = pd.read_csv('/content/complete_solar_data2.csv')

# Convert DATE_TIME to datetime format
df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], errors='coerce')

# Extract useful time-based features
df['YEAR'] = df['DATE_TIME'].dt.year
df['MONTH'] = df['DATE_TIME'].dt.month
df['DAY'] = df['DATE_TIME'].dt.day
df['HOUR'] = df['DATE_TIME'].dt.hour

# Drop DATE_TIME column
df.drop(columns=['DATE_TIME'], inplace=True)

# Drop NaN values (if any)
df.dropna(inplace=True)

# Define target column
target_column = 'AC_POWER'

# Separate features and target
features = df.drop(columns=[target_column])
target = df[target_column]

# Feature Scaling
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Splitting Data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Reshape data for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Build LSTM Model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(1, X_train.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='relu')  # Ensure correct activation for regression
])

# Compile Model
model.compile(optimizer='adam', loss='mse')

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)

# Train Model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr])

# Predictions
y_pred = model.predict(X_test)

# Performance Metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Calculate Accuracy using MAPE
mape = np.mean(np.abs((y_test - y_pred.flatten()) / y_test)) * 100
accuracy = 100 - mape  # Accuracy formula

# Print Results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")
print(f"Model Accuracy (100 - MAPE): {accuracy:.2f}%")


Epoch 1/50


  super().__init__(**kwargs)


[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - loss: 148527.4219 - val_loss: 47584.8750 - learning_rate: 0.0010
Epoch 2/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 38748.3555 - val_loss: 16965.0449 - learning_rate: 0.0010
Epoch 3/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 19946.0781 - val_loss: 15352.1348 - learning_rate: 0.0010
Epoch 4/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 18545.8086 - val_loss: 14821.6855 - learning_rate: 0.0010
Epoch 5/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - loss: 18632.6621 - val_loss: 14433.4170 - learning_rate: 0.0010
Epoch 6/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 17548.2969 - val_loss: 14263.7207 - learning_rate: 0.0010
Epoch 7/50
[1m1693/1693[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6

In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
df = pd.read_csv('/content/complete_solar_data2.csv')

# Convert DATE_TIME to datetime format
df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], errors='coerce')

# Extract time-based features
df['YEAR'] = df['DATE_TIME'].dt.year
df['MONTH'] = df['DATE_TIME'].dt.month
df['DAY'] = df['DATE_TIME'].dt.day
df['HOUR'] = df['DATE_TIME'].dt.hour

# Drop DATE_TIME column
df.drop(columns=['DATE_TIME'], inplace=True)

# Drop NaN values (if any)
df.dropna(inplace=True)

# Define target column
target_column = 'AC_POWER'

# Separate features and target
features = df.drop(columns=[target_column])
target = df[target_column]

# Feature Scaling
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Splitting Data
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Reshape data for LSTM (Sliding Window Technique)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Build Improved LSTM Model
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True, input_shape=(1, X_train.shape[2]))),
    BatchNormalization(),
    Dropout(0.3),

    Bidirectional(LSTM(64, return_sequences=True)),
    BatchNormalization(),
    Dropout(0.3),

    LSTM(32, return_sequences=False),
    BatchNormalization(),
    Dropout(0.3),

    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='relu')  # Output layer
])

# Compile Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)

# Train Model
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr])

# Predictions
y_pred = model.predict(X_test)

# Performance Metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Calculate Accuracy using MAPE
mape = np.mean(np.abs((y_test - y_pred.flatten()) / y_test)) * 100
accuracy = 100 - mape  # Accuracy formula

# Print Results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")
print(f"Model Accuracy (100 - MAPE): {accuracy:.2f}%")


Epoch 1/100


  super().__init__(**kwargs)


[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 13ms/step - loss: 92990.0234 - val_loss: 17010.2051 - learning_rate: 0.0010
Epoch 2/100
[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 22931.0254 - val_loss: 15978.2207 - learning_rate: 0.0010
Epoch 3/100
[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 21604.7930 - val_loss: 14770.6201 - learning_rate: 0.0010
Epoch 4/100
[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - loss: 20876.9844 - val_loss: 13670.2939 - learning_rate: 0.0010
Epoch 5/100
[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - loss: 20015.1016 - val_loss: 14858.7773 - learning_rate: 0.0010
Epoch 6/100
[1m847/847[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - loss: 19900.0840 - val_loss: 14583.3809 - learning_rate: 0.0010
Epoch 7/100
[1m846/847[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms

In [15]:
# Function to take user input and make a prediction
def predict_ac_power():
    print("\nEnter feature values for prediction:")

    # Accept user input for all features
    feature_values = []
    for col in features.columns:
        val = float(input(f"Enter {col}: "))
        feature_values.append(val)

    # Convert input to numpy array
    feature_array = np.array(feature_values).reshape(1, -1)

    # Scale input using the previously fitted scaler
    feature_array_scaled = scaler.transform(feature_array)

    # Reshape for LSTM input
    feature_array_reshaped = np.reshape(feature_array_scaled, (1, 1, feature_array_scaled.shape[1]))

    # Make prediction
    predicted_power = model.predict(feature_array_reshaped)

    # Print the prediction
    print(f"\nPredicted AC Power: {predicted_power[0][0]:.2f}")

# Call function for user input and prediction
predict_ac_power()


Enter feature values for prediction:
Enter AMBIENT_TEMPERATURE: 60
Enter MODULE_TEMPERATURE: 3
Enter IRRADIATION: 12
Enter DAILY_YIELD: 23
Enter YEAR: 2022
Enter MONTH: 3
Enter DAY: 6
Enter HOUR: 23
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

Predicted AC Power: 1043.18


