In [1]:
import pandas as pd
import logging
import matplotlib.pyplot as plt
import numpy as np

# create an instance of the logger
logger = logging.getLogger()

# Change current directory to parent directory
import os
os.chdir("..")

In [4]:
# Define global variables
RAW_PATH_MERGED_DATA = 'data/processed/merged.csv'

# Check if the file exists
if os.path.exists(RAW_PATH_MERGED_DATA):
    # Load the file
    merged_data = pd.read_csv(RAW_PATH_MERGED_DATA)
    print(merged_data.head())
else:
    print("File not found:", RAW_PATH_MERGED_DATA)

              DateTime    Time_fault Fault  Time_scada  Error  \
0  2014-05-14 14:39:44  1.400096e+09    GF         NaN    NaN   
1  2014-05-14 14:50:24  1.400097e+09    GF         NaN    NaN   
2  2014-05-14 14:58:56  1.400098e+09    GF         NaN    NaN   
3  2014-05-14 15:09:36  1.400098e+09    GF         NaN    NaN   
4  2014-05-14 15:20:16  1.400099e+09    GF         NaN    NaN   

   WEC: ava. windspeed  WEC: max. windspeed  WEC: min. windspeed  \
0                  NaN                  NaN                  NaN   
1                  NaN                  NaN                  NaN   
2                  NaN                  NaN                  NaN   
3                  NaN                  NaN                  NaN   
4                  NaN                  NaN                  NaN   

   WEC: ava. Rotation  WEC: max. Rotation  ...  Inverter averages  \
0                 NaN                 NaN  ...                NaN   
1                 NaN                 NaN  ...                

  merged_data = pd.read_csv(RAW_PATH_MERGED_DATA)


## Adding Seasonality (High or Low) as Feature for the Model

### Based Market Data Season Anaylsis, using Average and Max Settlement Point Price For Each Month In 2014

### We determined

#### Months of High Seasons as: **['Feb', 'Mar', 'Apr', 'May', 'Jun','Aug']**

#### Months of High Seasons as: **['Jan', 'Jul', 'Sep', 'Oct', 'Nov','Dec']**

In [5]:
# Define the mapping for high and low seasons
SEASON_MAPPING = {
    'Jan': 'Low',
    'Feb': 'High',
    'Mar': 'High',
    'Apr': 'High',
    'May': 'High',
    'Jun': 'High',
    'Jul': 'Low',
    'Aug': 'High',
    'Sep': 'Low',
    'Oct': 'Low',
    'Nov': 'Low',
    'Dec': 'Low'
}

# Convert the 'DateTime' column to datetime if not already
if 'DateTime' in merged_data.columns:
    merged_data['DateTime'] = pd.to_datetime(merged_data['DateTime'])
else:
    logger.error("DateTime column is missing from the dataset.")

# Extract the month from the 'DateTime' column and map it to the season
merged_data['Month'] = merged_data['DateTime'].dt.strftime('%b')  # Extract month abbreviation
merged_data['Season'] = merged_data['Month'].map(SEASON_MAPPING)  # Map month to season

# Display the updated dataset with the new 'Season' column
print(merged_data[['DateTime', 'Month', 'Season']].head())

             DateTime Month Season
0 2014-05-14 14:39:44   May   High
1 2014-05-14 14:50:24   May   High
2 2014-05-14 14:58:56   May   High
3 2014-05-14 15:09:36   May   High
4 2014-05-14 15:20:16   May   High


### Analyze season-wise statistics

In [6]:
# Save the updated dataset if needed
UPDATED_PATH = 'data/processed/merged_with_season.csv'
merged_data.to_csv(UPDATED_PATH, index=False)
print(f"Updated dataset with 'Season' column saved to {UPDATED_PATH}")

# Optional: Analyze season-wise statistics
season_stats = merged_data.groupby('Season').agg({
    'WEC: ava. Power': ['mean', 'max'],  # Example columns for analysis
    'WEC: Production minutes': 'sum'
}).reset_index()

# Display the season-wise stats
print(season_stats)

Updated dataset with 'Season' column saved to data/processed/merged_with_season.csv
  Season WEC: ava. Power         WEC: Production minutes
                    mean     max                     sum
0   High      840.817974  3070.0                621965.0
1    Low     1030.455417  3071.0                791445.0


In [None]:
# Save the updated dataset if needed
# UPDATED_PATH = 'data/processed/merged_with_season.csv'
# merged_data.to_csv(UPDATED_PATH, index=False)
# print(f"Updated dataset with 'Season' column saved to {UPDATED_PATH}")