# Capstone 3: Forecasting Solar Power Using LTSM   
### "Exploring ML Techniques for Solar Predictions"
Audrey Malloy

Date Updated: April 22nd, 2025

**Project Goal:** Be able to identify the most effective short-term solar power forecasting model using time series forecasting and deep learning techniques such as LTSM and XGBoost. 

### Objectives for Data Wrangling
#### 1. Handle Missing Data  
#### 2. Data Cleaning & Transformation
#### 3. Feature Engineering  

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [71]:
os.chdir('C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data')
plant_1_weather = pd.read_csv('Plant_1_Weather_Sensor_Data.csv')
plant_1_generation = pd.read_csv('Plant_1_Generation_Data.csv')
plant_2_weather = pd.read_csv('Plant_2_Weather_Sensor_Data.csv')
plant_2_generation = pd.read_csv('Plant_2_Generation_Data.csv')


In [73]:
print(plant_1_weather.head())
print("------------------------------------------------------------")
print(plant_1_generation.head())

             DATE_TIME  PLANT_ID       SOURCE_KEY  AMBIENT_TEMPERATURE  \
0  2020-05-15 00:00:00   4135001  HmiyD2TTLFNqkNe            25.184316   
1  2020-05-15 00:15:00   4135001  HmiyD2TTLFNqkNe            25.084589   
2  2020-05-15 00:30:00   4135001  HmiyD2TTLFNqkNe            24.935753   
3  2020-05-15 00:45:00   4135001  HmiyD2TTLFNqkNe            24.846130   
4  2020-05-15 01:00:00   4135001  HmiyD2TTLFNqkNe            24.621525   

   MODULE_TEMPERATURE  IRRADIATION  
0           22.857507          0.0  
1           22.761668          0.0  
2           22.592306          0.0  
3           22.360852          0.0  
4           22.165423          0.0  
------------------------------------------------------------
          DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  \
0  15-05-2020 00:00   4135001  1BY6WEcLGh8j5v7       0.0       0.0   
1  15-05-2020 00:00   4135001  1IF53ai7Xc0U56Y       0.0       0.0   
2  15-05-2020 00:00   4135001  3PZuoBAID5Wc2HD       0.0      

In [21]:
print(plant_2_weather.head())
print("------------------------------------------------------------")
print(plant_2_generation.head())

             DATE_TIME  PLANT_ID       SOURCE_KEY  AMBIENT_TEMPERATURE  \
0  2020-05-15 00:00:00   4136001  iq8k7ZNt4Mwm3w0            27.004764   
1  2020-05-15 00:15:00   4136001  iq8k7ZNt4Mwm3w0            26.880811   
2  2020-05-15 00:30:00   4136001  iq8k7ZNt4Mwm3w0            26.682055   
3  2020-05-15 00:45:00   4136001  iq8k7ZNt4Mwm3w0            26.500589   
4  2020-05-15 01:00:00   4136001  iq8k7ZNt4Mwm3w0            26.596148   

   MODULE_TEMPERATURE  IRRADIATION  
0           25.060789          0.0  
1           24.421869          0.0  
2           24.427290          0.0  
3           24.420678          0.0  
4           25.088210          0.0  
------------------------------------------------------------
             DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  \
0  2020-05-15 00:00:00   4136001  4UPUqMRk7TRMgml       0.0       0.0   
1  2020-05-15 00:00:00   4136001  81aHJ1q11NBPMrL       0.0       0.0   
2  2020-05-15 00:00:00   4136001  9kRcWv60rDACzjR    

In [None]:
#Checking for missing data

In [75]:
datasets = {
    "Plant 1 Weather": plant_1_weather,
    "Plant 2 Weather": plant_2_weather,
    "Plant 1 Generation": plant_1_generation,
    "Plant 2 Generation": plant_2_generation
}

for name, df in datasets.items():
    print(f"\nMissing values in {name}:")
    print(df.isnull().sum())



Missing values in Plant 1 Weather:
DATE_TIME              0
PLANT_ID               0
SOURCE_KEY             0
AMBIENT_TEMPERATURE    0
MODULE_TEMPERATURE     0
IRRADIATION            0
dtype: int64

Missing values in Plant 2 Weather:
DATE_TIME              0
PLANT_ID               0
SOURCE_KEY             0
AMBIENT_TEMPERATURE    0
MODULE_TEMPERATURE     0
IRRADIATION            0
dtype: int64

Missing values in Plant 1 Generation:
DATE_TIME      0
PLANT_ID       0
SOURCE_KEY     0
DC_POWER       0
AC_POWER       0
DAILY_YIELD    0
TOTAL_YIELD    0
dtype: int64

Missing values in Plant 2 Generation:
DATE_TIME      0
PLANT_ID       0
SOURCE_KEY     0
DC_POWER       0
AC_POWER       0
DAILY_YIELD    0
TOTAL_YIELD    0
dtype: int64


In [77]:
print(plant_1_weather.dtypes)

DATE_TIME               object
PLANT_ID                 int64
SOURCE_KEY              object
AMBIENT_TEMPERATURE    float64
MODULE_TEMPERATURE     float64
IRRADIATION            float64
dtype: object


In [83]:
plant_1_weather.head()

Unnamed: 0,DATE_TIME,PLANT_ID,SOURCE_KEY,AMBIENT_TEMPERATURE,MODULE_TEMPERATURE,IRRADIATION
0,2020-05-15 00:00:00,4135001,HmiyD2TTLFNqkNe,25.184316,22.857507,0.0
1,2020-05-15 00:15:00,4135001,HmiyD2TTLFNqkNe,25.084589,22.761668,0.0
2,2020-05-15 00:30:00,4135001,HmiyD2TTLFNqkNe,24.935753,22.592306,0.0
3,2020-05-15 00:45:00,4135001,HmiyD2TTLFNqkNe,24.84613,22.360852,0.0
4,2020-05-15 01:00:00,4135001,HmiyD2TTLFNqkNe,24.621525,22.165423,0.0


In [None]:
#Converting DATE_TIME to datetime 

In [87]:
for df in [plant_1_weather, plant_2_weather, plant_1_generation, plant_2_generation]:
    df['DATE_TIME'] = pd.to_datetime(df['DATE_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

In [89]:
df.duplicated().sum()

0

In [91]:
df.dtypes

DATE_TIME      datetime64[ns]
PLANT_ID                int64
SOURCE_KEY             object
DC_POWER              float64
AC_POWER              float64
DAILY_YIELD           float64
TOTAL_YIELD           float64
dtype: object

In [None]:
#Feature Engineering 

In [93]:
for df in [plant_1_generation, plant_2_generation, plant_1_weather, plant_2_weather]:
    df['Hour'] = df['DATE_TIME'].dt.hour
    df['Day'] = df['DATE_TIME'].dt.day
    df['Month'] = df['DATE_TIME'].dt.month
    df['Year'] = df['DATE_TIME'].dt.year


In [95]:
df.info

<bound method DataFrame.info of                DATE_TIME  PLANT_ID       SOURCE_KEY  AMBIENT_TEMPERATURE  \
0    2020-05-15 00:00:00   4136001  iq8k7ZNt4Mwm3w0            27.004764   
1    2020-05-15 00:15:00   4136001  iq8k7ZNt4Mwm3w0            26.880811   
2    2020-05-15 00:30:00   4136001  iq8k7ZNt4Mwm3w0            26.682055   
3    2020-05-15 00:45:00   4136001  iq8k7ZNt4Mwm3w0            26.500589   
4    2020-05-15 01:00:00   4136001  iq8k7ZNt4Mwm3w0            26.596148   
...                  ...       ...              ...                  ...   
3254 2020-06-17 22:45:00   4136001  iq8k7ZNt4Mwm3w0            23.511703   
3255 2020-06-17 23:00:00   4136001  iq8k7ZNt4Mwm3w0            23.482282   
3256 2020-06-17 23:15:00   4136001  iq8k7ZNt4Mwm3w0            23.354743   
3257 2020-06-17 23:30:00   4136001  iq8k7ZNt4Mwm3w0            23.291048   
3258 2020-06-17 23:45:00   4136001  iq8k7ZNt4Mwm3w0            23.202871   

      MODULE_TEMPERATURE  IRRADIATION  Hour  Day  Month

In [97]:
datapath = 'C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data'

plant_1_generation_file = 'plant_1_generation_cleaned.csv'
plant_2_generation_file = 'plant_2_generation_cleaned.csv'
plant_1_weather_file = 'plant_1_weather_cleaned.csv'
plant_2_weather_file = 'plant_2_weather_cleaned.csv'

# Save Plant 1 Generation data
plant_1_generation_path = os.path.join(datapath, plant_1_generation_file)
plant_1_generation.to_csv(plant_1_generation_path, index=False)
print(f"Plant 1 Generation data saved successfully to '{plant_1_generation_path}'")

# Save Plant 2 Generation data
plant_2_generation_path = os.path.join(datapath, plant_2_generation_file)
plant_2_generation.to_csv(plant_2_generation_path, index=False)
print(f"Plant 2 Generation data saved successfully to '{plant_2_generation_path}'")

# Save Plant 1 Weather data
plant_1_weather_path = os.path.join(datapath, plant_1_weather_file)
plant_1_weather.to_csv(plant_1_weather_path, index=False)
print(f"Plant 1 Weather data saved successfully to '{plant_1_weather_path}'")

# Save Plant 2 Weather data
plant_2_weather_path = os.path.join(datapath, plant_2_weather_file)
plant_2_weather.to_csv(plant_2_weather_path, index=False)
print(f"Plant 2 Weather data saved successfully to '{plant_2_weather_path}'")

Plant 1 Generation data saved successfully to 'C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data\plant_1_generation_cleaned.csv'
Plant 2 Generation data saved successfully to 'C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data\plant_2_generation_cleaned.csv'
Plant 1 Weather data saved successfully to 'C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data\plant_1_weather_cleaned.csv'
Plant 2 Weather data saved successfully to 'C:/Users/aamal/Desktop/Springboard/Springboard_DataScience/Capstone-3-Solar/Data\plant_2_weather_cleaned.csv'
