# Homework 2

## Data Science Questions
1) How does power usage evolve throughout the day, by household?
2) Within each hosuehold, which appliances dominate consumption?
3) How do the 3 households compare in terms of lifestyle patterns? 


Potential chart types:
* Heatmap of hour by day of week. 
  * 
* Pie chart of power sources with slider that will evolve the proportions of power sources over time
  * Maybe also have a dropdown menu for household number. 
  * I could also aggregate by type (necessary, recreational, etc.)
* Chart like the one from the altair lab with average power by source (bar chart) and then a line chart that evolves over time. 
  * You would be able to click on a bar to highlight the line

In [2]:
# Import packages
import pandas as pd
import altair as alt
import os 
import glob

## Chart 1

In [3]:
## Load data
def load_household_power(household_num):
    # e.g. "04"
    pattern = f"./eco/eco/{household_num}_sm_csv/{household_num}/*.csv"
    files = sorted(glob.glob(pattern))
    
    dfs = []
    for filepath in files:
        # Extract date from filename
        date_str = os.path.basename(filepath).replace(".csv", "") 
        
        # Read only the first column, skip bad rows
        df = pd.read_csv(filepath, header=None, usecols=[0], names=["power"])
        
        # Replace missing values (-1) with NaN
        df["power"] = df["power"].replace(-1, pd.NA)
        
        # Build a proper datetime index (one row per second)
        df["timestamp"] = pd.date_range(start=date_str, periods=len(df), freq="s")
        df["hour"] = df["timestamp"].dt.hour
        df["day_of_week"] = df["timestamp"].dt.day_name()

        # Household number
        df["household"] = household_num
        
        dfs.append(df)
    
    combined = pd.concat(dfs, ignore_index=True)
    return combined

df_04 = load_household_power("04")
df_05 = load_household_power("05")
df_06 = load_household_power("06")

df1 = pd.concat([df_04, df_05, df_06], ignore_index=True)

In [9]:
# Summary
print(df1.shape)
df1.head()

(51840000, 5)


Unnamed: 0,power,timestamp,hour,day_of_week,household
0,475.051,2012-06-27 00:00:00,0,Wednesday,4
1,473.888,2012-06-27 00:00:01,0,Wednesday,4
2,479.664,2012-06-27 00:00:02,0,Wednesday,4
3,473.089,2012-06-27 00:00:03,0,Wednesday,4
4,476.251,2012-06-27 00:00:04,0,Wednesday,4


In [None]:
# Aggregate to hour x day_of_week x household
heatmap_data = (df1
    .groupby(["household", "hour", "day_of_week"])["power"]
    .mean()
    .reset_index()
    .rename(columns={"power": "avg_power"})
)

# Order days correctly
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

# Selection on click - binds to hour
click = alt.selection_point(fields=["hour"])

# Heatmap
heatmap = (
    alt.Chart(heatmap_data)
    .mark_rect()
    .encode(
        x=alt.X("hour:O", title="Hour of Day"),
        y=alt.Y("day_of_week:O", sort=day_order, title=None),
        color=alt.Color(
            "avg_power:Q",
            scale=alt.Scale(scheme="orangered"),
            title="Avg Power (W)"
        ),
        opacity=alt.condition(click, alt.value(1.0), alt.value(0.5)),
        tooltip=[
            alt.Tooltip("household:N", title="Household"),
            alt.Tooltip("hour:O", title="Hour"),
            alt.Tooltip("day_of_week:O", title="Day"),
            alt.Tooltip("avg_power:Q", title="Avg Power (W)", format=".1f")
        ]
    )
    .add_params(click)
    .facet(column=alt.Column("household:N", title="Household"))
    .properties(title="Average Power Consumption by Hour and Day")
)

# Linked line chart - filters to selected hour across all days
line_data = (df1
    .groupby(["household", "hour", 
              df1["timestamp"].dt.date.rename("date")])["power"]
    .mean()
    .reset_index()
)
line_data["date"] = pd.to_datetime(line_data["date"])

line_chart = (
    alt.Chart(line_data)
    .mark_line(point=True)
    .encode(
        x=alt.X("date:T", title="Date"),
        y=alt.Y("power:Q", title="Avg Power (W)"),
        color=alt.Color("household:N", title="Household"),
        tooltip=[
            alt.Tooltip("household:N"),
            alt.Tooltip("date:T"),
            alt.Tooltip("power:Q", format=".1f")
        ]
    )
    .transform_filter(click)
    .properties(
        title="Daily Average Power for Selected Hour",
        width=700,
        height=200
    )
)

# Combine vertically
final_chart = (heatmap & line_chart).resolve_scale(color="independent")
final_chart