# NSW Electricity Demand Analysis
## Case Study 1: Daily, Weekly, Seasonal Demand Patterns

This notebook performs exploratory data analysis (EDA) on NSW operational demand data 
to understand daily cycles, weekly behaviour, and seasonal trends.

Dataset Source: AEMO Operational Demand (via Python library or uploaded dataset)
Region: NSW1
Timeframe: YYYY-YYYY (fill after loading data)


In [1]:
# Step 1: Import necessary library files and set up cache
import pandas as pd
from nemosis import dynamic_data_compiler
from datetime import datetime, timedelta
import os
from pymongo import MongoClient

# --- Cache Setup ---
# A temporary directory to store AEMO CSV files downloaded by nemosis
raw_data_cache_dir = "/tmp/nemosis_cache"
os.makedirs(raw_data_cache_dir, exist_ok=True)
print(f"Cache directory ensured: {raw_data_cache_dir}")

# --- AEMO Table Name ---
# We will use DISPATCHREGIONSUM for reliable regional demand data
table_name = "DISPATCHREGIONSUM"

# --- Date range setup ---
start_date = datetime(2024, 1, 1)
end_date = datetime(2025, 1, 1)
print(f"Data range defined from {start_date.date()} to {end_date.date()}")


Cache directory ensured: /tmp/nemosis_cache
Data range defined from 2024-01-01 to 2025-01-01


In [2]:
# Step 2: Connect to MongoDB
try:
    client = MongoClient("mongodb://localhost:27017/")
    # Access the database and collection
    db = client["AEMO_Data_Archive"]
    collection = db["NSW_Operational_Demand_2024"]
    
    # Optional: Check connection by listing existing collections
    print(f"Connected to MongoDB. Collections in 'AEMO_Data_Archive': {db.list_collection_names()}")
    print(f"Target collection is '{collection.name}'.")

except Exception as e:
    print(f"Failed to connect to MongoDB: {e}")
    print("Please ensure your MongoDB service (mongod) is running on localhost:27017.")
    # If connection fails, the script might stop here depending on your environment.


KeyboardInterrupt: 

In [None]:
# Step 3: Loop through data month-by-month and ingest into MongoDB
current = start_date

print("\n--- Starting Data Ingestion Loop ---")

while current < end_date:
    # Calculate the next month's start and end dates accurately
    next_month = (current.replace(day=28) + timedelta(days=4)).replace(day=1)
    end_of_month = next_month - timedelta(seconds=1) 
    
    start_str = current.strftime("%Y/%m/%d %H:%M:%S")
    end_str = end_of_month.strftime("%Y/%m/%d %H:%M:%S")
    
    print(f"\nProcessing {start_str} â†’ {end_str} for {table_name}...")

    try:
        df = dynamic_data_compiler(
            start_str,
            end_str,
            table_name,
            raw_data_cache_dir
        )
        
        # Filter for the NSW region using the verified 'REGIONID' column
        if 'REGIONID' in df.columns:
            df_nsw = df[df["REGIONID"] == "NSW1"].copy()
            
            if not df_nsw.empty:
                # Insert into MongoDB
                records = df_nsw.to_dict("records")
                result = collection.insert_many(records)
                print(f"Inserted {len(result.inserted_ids)} documents for NSW1 in {current.strftime('%B %Y')}.")
            else:
                print(f"No NSW1 specific data found for this period.")
        else:
            print(f"Error: 'REGIONID' column not found in the {table_name} data frame.")

        del df # Release memory
        
    except Exception as e:
        print(f"An error occurred while processing {current.strftime('%B %Y')}: {e}")
        # Continue to the next month even if one month fails
        pass

    current = next_month

print("\n--- Data ingestion complete! ---")

In [None]:
cursor = collection.find({})
df = pd.DataFrame(list(cursor))

df.head()
df.info()
df.columns


In [None]:
df.head()

In [None]:
df_analysis = df[['SETTLEMENTDATE', 'TOTALDEMAND']]


In [None]:
df_analysis.head()

In [None]:
df_analysis['SETTLEMENTDATE'] = pd.to_datetime(df['SETTLEMENTDATE'])

df_analysis['hour'] = dfdfdf['SETTLEMENTDATE'].dt.hour
df_analysis['day'] = dfdf['SETTLEMENTDATE'].dt.day
df_analysis['weekday'] = df['SETTLEMENTDATE'].dt.day_name()
df_analysis['month'] = df['SETTLEMENTDATE'].dt.month
df_analysis['month_name'] = df['SETTLEMENTDATE'].dt.month_name()
df_analysis['date'] = df['SETTLEMENTDATE'].dt.date


In [None]:
df_analysis.head()

In [None]:
df_analysis['season'] = df_analysis['SETTLEMENTDATE'].dt.month % 12 // 3 + 1
df_analysis['season_name'] = df_analysis['season'].map({
    1: 'Summer',
    2: 'Autumn',
    3: 'Winter',
    4: 'Spring'
})


In [None]:
df_analysis.head()

In [None]:
df_analysis.info()