In [365]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.


### IMPORTING LIBARIES

In [367]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# TensorFlow/Keras imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split



### IMPORTING DATASET

In [369]:
power=pd.read_csv("Plant_1_Generation_Data.csv")
sensor=pd.read_csv("Plant_1_Weather_Sensor_Data.csv")

### DATA PREPROCESSING

In [371]:
power=power.dropna()
sensor=sensor.dropna()
sensor.head()

Unnamed: 0,DATE_TIME,PLANT_ID,SOURCE_KEY,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION
0,2020-05-15 00:00:00,4135001,HmiyD2TTLFNqkNe,25.184316,22.857507,0.0
1,2020-05-15 00:15:00,4135001,HmiyD2TTLFNqkNe,25.084589,22.761668,0.0
2,2020-05-15 00:30:00,4135001,HmiyD2TTLFNqkNe,24.935753,22.592306,0.0
3,2020-05-15 00:45:00,4135001,HmiyD2TTLFNqkNe,24.84613,22.360852,0.0
4,2020-05-15 01:00:00,4135001,HmiyD2TTLFNqkNe,24.621525,22.165423,0.0


In [372]:
power.isnull().sum()

DATE_TIME      0
PLANT_ID       0
SOURCE_KEY     0
DC_POWER       0
AC_POWER       0
DAILY_YIELD    0
TOTAL_YIELD    0
dtype: int64

In [373]:
power['DATE_TIME'] = pd.to_datetime(power['DATE_TIME'],format = '%d-%m-%Y %H:%M')


In [374]:
sensor['DATE_TIME'] = pd.to_datetime(sensor['DATE_TIME'],format = '%Y-%m-%d %H:%M:%S')


In [375]:
sensor.head()

Unnamed: 0,DATE_TIME,PLANT_ID,SOURCE_KEY,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION
0,2020-05-15 00:00:00,4135001,HmiyD2TTLFNqkNe,25.184316,22.857507,0.0
1,2020-05-15 00:15:00,4135001,HmiyD2TTLFNqkNe,25.084589,22.761668,0.0
2,2020-05-15 00:30:00,4135001,HmiyD2TTLFNqkNe,24.935753,22.592306,0.0
3,2020-05-15 00:45:00,4135001,HmiyD2TTLFNqkNe,24.84613,22.360852,0.0
4,2020-05-15 01:00:00,4135001,HmiyD2TTLFNqkNe,24.621525,22.165423,0.0


In [376]:
power = pd.merge(power.drop(columns = ['PLANT_ID']), sensor.drop(columns = ['PLANT_ID', 'SOURCE_KEY']), on='DATE_TIME')

In [377]:
power['DATE_TIME'] = pd.to_datetime(power['DATE_TIME'])
power['YEAR'] = power['DATE_TIME'].dt.year
power['MONTH'] = power['DATE_TIME'].dt.month
power['DAY'] = power['DATE_TIME'].dt.day
power['HOUR'] = power['DATE_TIME'].dt.hour
power['MINUTES'] = power['DATE_TIME'].dt.minute
power['MINUTES_PASS'] = power['HOUR'] * 60 + power['MINUTES']

In [378]:

unique_elements = power["SOURCE_KEY"].unique()
print("Unique Elements:", unique_elements)
power["SOURCE_KEY"], unique_labels = pd.factorize(power["SOURCE_KEY"])
print("\nDataFrame with Numeric Labels:")
print(power)

Unique Elements: ['1BY6WEcLGh8j5v7' '1IF53ai7Xc0U56Y' '3PZuoBAID5Wc2HD' '7JYdWkrLSPkdwr4'
 'McdE0feGgRqW7Ca' 'VHMLBKoKgIrUVDU' 'WRmjgnKYAwPKWDb' 'ZnxXDlPa8U1GXgE'
 'ZoEaEvLYb1n2sOq' 'adLQvlD726eNBSB' 'bvBOhCH3iADSZry' 'iCRJl6heRkivqQ3'
 'ih0vzX44oOqAx2f' 'pkci93gMrogZuBj' 'rGa61gmuvPhdLxV' 'sjndEbLyjtCKgGv'
 'uHbuxQJl8lW7ozc' 'wCURE6d3bPkepu2' 'z9Y9gH1T5YWrNuG' 'zBIq5rxdHJRwDNY'
 'zVJPv84UY57bAof' 'YxYtjZvoooNbGkE']

DataFrame with Numeric Labels:
                DATE_TIME  SOURCE_KEY  DC_POWER  AC_POWER  DAILY_YIELD  \
0     2020-05-15 00:00:00           0       0.0       0.0          0.0   
1     2020-05-15 00:00:00           1       0.0       0.0          0.0   
2     2020-05-15 00:00:00           2       0.0       0.0          0.0   
3     2020-05-15 00:00:00           3       0.0       0.0          0.0   
4     2020-05-15 00:00:00           4       0.0       0.0          0.0   
...                   ...         ...       ...       ...          ...   
14844 2020-05-22 20:00:00     

In [418]:
power=power[['SOURCE_KEY','AMBIENT_TEMPERATURE','AC_POWER','MODULE_TEMPERATURE','IRRADIATION','YEAR','MONTH','DAY','HOUR','MINUTES','MINUTES_PASS']]
power.head()

Unnamed: 0,SOURCE_KEY,AMBIENT_TEMPERATURE,AC_POWER,MODULE_TEMPERATURE,IRRADIATION,YEAR,MONTH,DAY,HOUR,MINUTES,MINUTES_PASS
0,0,25.184316,0.0,22.857507,0.0,2020,5,15,0,0,0
1,1,25.184316,0.0,22.857507,0.0,2020,5,15,0,0,0
2,2,25.184316,0.0,22.857507,0.0,2020,5,15,0,0,0
3,3,25.184316,0.0,22.857507,0.0,2020,5,15,0,0,0
4,4,25.184316,0.0,22.857507,0.0,2020,5,15,0,0,0


In [420]:
X=power[['SOURCE_KEY','AMBIENT_TEMPERATURE','MODULE_TEMPERATURE','IRRADIATION','YEAR','MONTH','DAY','HOUR','MINUTES','MINUTES_PASS']]

In [422]:
X.head()

Unnamed: 0,SOURCE_KEY,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION,YEAR,MONTH,DAY,HOUR,MINUTES,MINUTES_PASS
0,0,25.184316,22.857507,0.0,2020,5,15,0,0,0
1,1,25.184316,22.857507,0.0,2020,5,15,0,0,0
2,2,25.184316,22.857507,0.0,2020,5,15,0,0,0
3,3,25.184316,22.857507,0.0,2020,5,15,0,0,0
4,4,25.184316,22.857507,0.0,2020,5,15,0,0,0


In [424]:
Y=power[["AC_POWER"]]

### SPLITTING THE DATASET

In [427]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=1)
print(X_train.shape)

(11879, 10)


### XG BOOST

In [430]:
model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=1000, learning_rate=0.1)
# Train the model
model.fit(X_train, Y_train)

In [431]:

# Make predictions
Y_pred = model.predict(X_test)

# Evaluate the model using Mean Squared Error (MSE)
mse = mean_squared_error(Y_test, Y_pred)

print(f"Mean Squared Error: {mse}")

Mean Squared Error: 1611.4176767512192


### RANDOM FOREST

In [435]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
rfr = RandomForestRegressor()
rfr.fit(X_train,Y_train)
y_pred_rfr = rfr.predict(X_test)
R2_Score_rfr = round(r2_score(y_pred_rfr,Y_test) * 100, 2)
MAE_rfr = mean_absolute_error(y_pred_rfr, Y_test)

print("R2 Score : ",R2_Score_rfr,"%")
print("MAE : ",MAE_rfr,"")

  return fit_method(estimator, *args, **kwargs)


R2 Score :  98.97 %
MAE :  14.92303275288244 


In [436]:
prediction_rfr = rfr.predict(X_test)
print(prediction_rfr)

[ 462.40451192  499.92400002    4.62683929 ... 1021.34669615    0.
    0.        ]


In [437]:
print(Y_test)

          AC_POWER
5339    449.816667
736     495.428571
2316      3.885714
6954    562.812500
2416    200.042857
...            ...
2963   1071.625000
7384    750.342857
9094   1078.785714
8094      0.000000
11590     0.000000

[2970 rows x 1 columns]


### LSTM

In [438]:
print(power.columns)


Index(['SOURCE_KEY', 'AMBIENT_TEMPERATURE', 'AC_POWER', 'MODULE_TEMPERATURE',
       'IRRADIATION', 'YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTES',
       'MINUTES_PASS'],
      dtype='object')


In [443]:
power_numeric = power.select_dtypes(include=['number'])
# Apply MinMaxScaler
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(power_numeric)
# Convert back to DataFrame
normalized_df = pd.DataFrame(normalized_data, columns=power_numeric.columns)
# Get target column index
target_column = normalized_df.columns.get_loc("AC_POWER")
print(target_column)

2


In [445]:
def create_sequences(data, target_column, seq_length=10):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data.iloc[i+seq_length, target_column])
    return np.array(X), np.array(y)
seq_length = 10 
X, y = create_sequences(normalized_df, target_column=2, seq_length=seq_length)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

  super().__init__(**kwargs)


Epoch 1/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 32ms/step - loss: 0.0111 - val_loss: 0.0033
Epoch 2/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 26ms/step - loss: 0.0027 - val_loss: 0.0027
Epoch 3/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - loss: 0.0020 - val_loss: 0.0028
Epoch 4/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 23ms/step - loss: 0.0019 - val_loss: 0.0024
Epoch 5/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - loss: 0.0018 - val_loss: 0.0022
Epoch 6/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 0.0015 - val_loss: 0.0031
Epoch 7/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - loss: 0.0015 - val_loss: 0.0021
Epoch 8/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - loss: 0.0015 - val_loss: 0.0027
Epoch 9/30
[1m371/371[0m [

In [447]:
y_pred = model.predict(X_test)

[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step


In [448]:

y_test_reshaped = y_test.reshape(-1, 1)
combined = np.hstack((y_test_reshaped, y_pred))
comparison_df = pd.DataFrame(combined, columns=["Actual Values", "Predicted Values"])
print(comparison_df)


      Actual Values  Predicted Values
0          0.534191          0.509876
1          0.522813          0.515609
2          0.514421          0.517127
3          0.527654          0.517120
4          0.602049          0.558951
...             ...               ...
2963       0.000000          0.007844
2964       0.000000          0.007939
2965       0.000000          0.008018
2966       0.000000          0.008070
2967       0.000000          0.008092

[2968 rows x 2 columns]


In [482]:
R2_Score = round(r2_score(y_pred,y_test_reshaped) * 100, 2)
MAE= mean_absolute_error(y_pred, y_test_reshaped)
print("R2 Score : ",R2_Score,"%")
print("MAE : ",MAE,"")

R2 Score :  97.18 %
MAE :  0.03146557205356216 


### INPUT FROM USER


In [478]:
def predict_from_input():
    feature_names = ['SOURCE_KEY', 'AMBIENT_TEMPERATURE', 'AC_POWER', 'MODULE_TEMPERATURE', 
                     'IRRADIATION', 'YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTES', 'MINUTES_PASS']
    user_input = {}
    for feature in feature_names:
        if feature == 'AC_POWER':
            user_input[feature] = 0.0  
        else:
            user_input[feature] = float(input(f"Enter value for {feature}: "))
    user_input_df = pd.DataFrame([user_input])
    user_input_scaled = scaler.transform(user_input_df)
    user_input_sequence = np.tile(user_input_scaled, (seq_length, 1)).reshape(1, seq_length, -1)  
    # Predict
    predicted_scaled = model.predict(user_input_sequence)
    placeholder = np.zeros((1, len(scaler.feature_names_in_)))
    placeholder[:, 2] = predicted_scaled[:, 0]    
    predicted_actual = scaler.inverse_transform(placeholder)[:, 2]  
    print(f"\nPredicted AC Power Output: {predicted_actual[0]:.2f} kW")
predict_from_input()




Enter value for SOURCE_KEY:  1
Enter value for AMBIENT_TEMPERATURE:  29
Enter value for MODULE_TEMPERATURE:  52
Enter value for IRRADIATION:  0.84
Enter value for YEAR:  2024
Enter value for MONTH:  2
Enter value for DAY:  1
Enter value for HOUR:  12
Enter value for MINUTES:  30
Enter value for MINUTES_PASS:  880


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step

Predicted AC Power Output: 1360.86 kW
