# Lakeland Electrical Capstone Project


## Potential Data Sources - publicly available
- NOAA or National Weather Association - Free weather data
- U.S Census Bureau or American Community Survey - Census Data
    - Try to find a Lakeland Regional one
- Energy Information Administration or OpenEI - Energy Industry Standards
    - national/state level only


## Weather Start
- NWS API - https://api.weather.gov/
    - Info: https://www.weather.gov/documentation/services-web-api
    - Lakeland grid - https://forecast.weather.gov/MapClick.php?x=178&y=130&site=tbw&zmx=&zmy=&map_x=178&map_y=130 = Lat 28.05 N, Lon 81.96 W (Rough Center)

- Lakeland Electrical Coverage - https://www.arcgis.com/apps/Viewer/index.html?appid=ffbd05dc2e94406ebcad89e30636877c


## FORECASTS - NEXT 12 HOURS AND NEXT WEEK

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# API for Lakeland - chosen rough center of Lakeland Electrical coverage map
lat, lon = 28.05, -81.96
points_url = f"https://api.weather.gov/points/{lat},{lon}"

# Define headers for the API request
headers = {
    "User-Agent": "myweatherapp (contact@myweatherapp.com)"
}

# Fetch the grid information
response = requests.get(points_url, headers=headers)

if response.status_code == 200:
    data = response.json()
    forecast_url = data["properties"]["forecast"]
    forecast_hourly_url = data["properties"]["forecastHourly"]
    grid_url = data["properties"]["forecastGridData"]

    print("12-hour Forecast URL:", forecast_url)

    # 12-hour forecast
    forecast_response = requests.get(forecast_url, headers=headers)
    if forecast_response.status_code == 200:
        forecast_data = forecast_response.json()
        forecast_periods = forecast_data["properties"]["periods"]

        # Convert to DataFrame
        forecast_df = pd.DataFrame(forecast_periods)

        # Precipitation parsing - only output the number
        forecast_df["probabilityOfPrecipitation"] = forecast_df["probabilityOfPrecipitation"].apply(
            lambda x: x["value"] if x and x["value"] is not None else 0
        )

        # Generate a new date column that increments every two rows
        start_date = datetime.now().date()
        forecast_dates = [
            (start_date + timedelta(days=i // 2)).strftime("%Y-%m-%d")
            for i in range(len(forecast_df))
        ]
        forecast_df.insert(0, "forecastDate", forecast_dates)

        # Columns to keep
        forecast_df = forecast_df[[
            "forecastDate", "name", "temperature", "shortForecast",
            "probabilityOfPrecipitation", "windSpeed", "windDirection",
            "isDaytime", "detailedForecast"
        ]]

        print("\n12-hour Forecast with Additional Features:")
        display(forecast_df)
    else:
        print("Error fetching 12-hour forecast data")
else:
    print("Error fetching information")



12-hour Forecast URL: https://api.weather.gov/gridpoints/TBW/90,104/forecast

12-hour Forecast with Additional Features:


Unnamed: 0,forecastDate,name,temperature,shortForecast,probabilityOfPrecipitation,windSpeed,windDirection,isDaytime,detailedForecast
0,2024-11-16,This Afternoon,79,Sunny,0,8 mph,ENE,True,"Sunny, with a high near 79. East northeast win..."
1,2024-11-16,Tonight,60,Mostly Clear,0,7 mph,NE,False,"Mostly clear, with a low around 60. Northeast ..."
2,2024-11-17,Sunday,80,Sunny,0,5 to 8 mph,ENE,True,"Sunny, with a high near 80. East northeast win..."
3,2024-11-17,Sunday Night,60,Mostly Clear,0,2 to 6 mph,E,False,"Mostly clear, with a low around 60. East wind ..."
4,2024-11-18,Monday,80,Sunny,0,2 to 6 mph,ESE,True,"Sunny, with a high near 80. East southeast win..."
5,2024-11-18,Monday Night,62,Mostly Clear,0,5 mph,ESE,False,"Mostly clear, with a low around 62. East south..."
6,2024-11-19,Tuesday,84,Mostly Sunny,0,7 mph,SSE,True,"Mostly sunny, with a high near 84. South south..."
7,2024-11-19,Tuesday Night,70,Mostly Cloudy then Chance Showers And Thunders...,30,5 to 9 mph,S,False,A chance of showers and thunderstorms after 1a...
8,2024-11-20,Wednesday,81,Showers And Thunderstorms Likely,60,12 mph,SW,True,Showers and thunderstorms likely. Partly sunny...
9,2024-11-20,Wednesday Night,56,Chance Rain Showers then Partly Cloudy,40,12 mph,WNW,False,A chance of rain showers before 7pm. Partly cl...


In [40]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# API for Lakeland - chosen rough center of Lakeland Electrical coverage map
lat, lon = 28.05, -81.96
points_url = f"https://api.weather.gov/points/{lat},{lon}"

# Define headers for the API request
headers = {
    "User-Agent": "myweatherapp (contact@myweatherapp.com)"
}

# Fetch the grid information
response = requests.get(points_url, headers=headers)

if response.status_code == 200:
    data = response.json()
    forecast_hourly_url = data["properties"]["forecastHourly"]
    grid_url = data["properties"]["forecastGridData"]
    
    print("Hourly Forecast URL:", forecast_hourly_url)
    print("Grid Forecast URL:", grid_url)

    # Grid data for relative humidity and dew point
    grid_response = requests.get(grid_url, headers=headers)
    if grid_response.status_code == 200:
        grid_data = grid_response.json()

        # Relative Humidity
        if "relativeHumidity" in grid_data["properties"]:
            rh_data = pd.DataFrame(grid_data["properties"]["relativeHumidity"]["values"])
            rh_data["startDate"] = rh_data["validTime"].str.split("T").str[0]
            rh_data["startTime_hour"] = rh_data["validTime"].str.extract(r"T(\d{2})")[0]
            rh_data = rh_data[["startDate", "startTime_hour", "value"]]
            rh_data.rename(columns={"value": "relativeHumidity"}, inplace=True)
        else:
            rh_data = pd.DataFrame(columns=["startDate", "startTime_hour", "relativeHumidity"])

        # Dew Point (if available)
        if "dewpoint" in grid_data["properties"]:
            dewpoint_data = pd.DataFrame(grid_data["properties"]["dewpoint"]["values"])
            dewpoint_data["startDate"] = dewpoint_data["validTime"].str.split("T").str[0]
            dewpoint_data["startTime_hour"] = dewpoint_data["validTime"].str.extract(r"T(\d{2})")[0]
            dewpoint_data["dewpoint"] = dewpoint_data["value"].apply(
                lambda x: round((x * 9 / 5) + 32, 1) if pd.notna(x) else None
            )
            dewpoint_data = dewpoint_data[["startDate", "startTime_hour", "dewpoint"]]
        else:
            dewpoint_data = pd.DataFrame(columns=["startDate", "startTime_hour", "dewpoint"])

        # Hourly forecast
        hourly_response = requests.get(forecast_hourly_url, headers=headers)
        if hourly_response.status_code == 200:
            hourly_data = hourly_response.json()
            hourly_periods = hourly_data["properties"]["periods"]

            # Convert to DataFrame
            hourly_df = pd.DataFrame(hourly_periods)

            # Precipitation parsing - only output the number
            hourly_df["probabilityOfPrecipitation"] = hourly_df["probabilityOfPrecipitation"].apply(
                lambda x: x["value"] if x and x["value"] is not None else None
            )

            # Split startTime into startDate and startTime
            hourly_df["startDate"] = hourly_df["startTime"].str.split("T").str[0]
            hourly_df["startTime_hour"] = hourly_df["startTime"].str.split("T").str[1].str[:2]

            # Filter data for 1-week forecast
            one_week_ahead = (datetime.now() + timedelta(days=7)).strftime("%Y-%m-%d")
            hourly_df = hourly_df[hourly_df["startDate"] <= one_week_ahead]

            # Columns to keep
            hourly_df = hourly_df[[
                "startDate", "startTime_hour", "temperature", "shortForecast",
                "probabilityOfPrecipitation", "windSpeed", "windDirection"
            ]]

            # Merge Relative Humidity and Dew Point with Hourly Forecast
            hourly_with_grid = pd.merge(hourly_df, rh_data, on=["startDate", "startTime_hour"], how="left")
            hourly_with_grid = pd.merge(hourly_with_grid, dewpoint_data, on=["startDate", "startTime_hour"], how="left")

            print("\nHourly Forecast with Relative Humidity and Dew Point:")
            display(hourly_with_grid)
        else:
            print("Error fetching hourly forecast data")
    else:
        print("Error fetching grid forecast data:", grid_response.status_code, grid_response.text)
else:
    print("Error fetching grid information")

Hourly Forecast URL: https://api.weather.gov/gridpoints/TBW/90,104/forecast/hourly
Grid Forecast URL: https://api.weather.gov/gridpoints/TBW/90,104

Hourly Forecast with Relative Humidity and Dew Point:


Unnamed: 0,startDate,startTime_hour,temperature,shortForecast,probabilityOfPrecipitation,windSpeed,windDirection,relativeHumidity,dewpoint
0,2024-11-16,15,79,Sunny,0,8 mph,ENE,72.0,60.0
1,2024-11-16,16,78,Sunny,0,8 mph,ENE,65.0,
2,2024-11-16,17,76,Sunny,0,8 mph,ENE,58.0,
3,2024-11-16,18,74,Mostly Clear,0,7 mph,ENE,55.0,
4,2024-11-16,19,70,Mostly Clear,0,7 mph,ENE,53.0,
...,...,...,...,...,...,...,...,...,...
151,2024-11-22,22,55,Mostly Clear,1,5 mph,NW,63.0,
152,2024-11-22,23,54,Clear,1,5 mph,NW,69.0,52.0
153,2024-11-23,00,54,Clear,1,5 mph,NNW,76.0,
154,2024-11-23,01,53,Clear,1,5 mph,NNW,80.0,51.0


### Dewpoint values are inconsistent - not every hour has that forecast

In [41]:
# Filter the DataFrame for rows where dewpoint is not NaN
filtered_data = hourly_with_grid[hourly_with_grid["dewpoint"].notna()]

# Check if any data exists
if not filtered_data.empty:
    print("Rows with non-NaN Dewpoint values:")
    display(filtered_data)
else:
    print("No rows with non-NaN Dewpoint values.")


Rows with non-NaN Dewpoint values:


Unnamed: 0,startDate,startTime_hour,temperature,shortForecast,probabilityOfPrecipitation,windSpeed,windDirection,relativeHumidity,dewpoint
0,2024-11-16,15,79,Sunny,0,8 mph,ENE,72.0,60.0
13,2024-11-17,04,61,Mostly Clear,0,5 mph,NE,84.0,59.0
14,2024-11-17,05,61,Mostly Clear,0,5 mph,NNE,90.0,60.0
17,2024-11-17,08,64,Sunny,0,6 mph,NE,94.0,59.0
18,2024-11-17,09,68,Sunny,0,6 mph,ENE,91.0,58.0
...,...,...,...,...,...,...,...,...,...
144,2024-11-22,15,67,Sunny,2,10 mph,WNW,65.0,49.0
145,2024-11-22,16,66,Sunny,2,10 mph,WNW,62.0,50.0
146,2024-11-22,17,64,Sunny,2,9 mph,WNW,59.0,51.0
152,2024-11-22,23,54,Clear,1,5 mph,NW,69.0,52.0


## HISTORICAL DATA

- NOAA.gov - https://www.ncdc.noaa.gov/cdo-web/webservices/v2
- API: 
    - Email:	jjackson3465@floridapoly.edu
    - Token:	idqwrJvPPATpYzveCAWajOVtSQGvFKpR

Station info: https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/locations/FIPS:12105/detail - Polk County

In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# NOAA CDO API base URL and token
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
token = "idqwrJvPPATpYzveCAWajOVtSQGvFKpR"  # NOAA API - DON'T MAKE PUBLIC

# Calculate dates
current_date = datetime.now().strftime("%Y-%m-%d")
week_timeframe = (datetime.now() - timedelta(weeks=1)).strftime("%Y-%m-%d")     # Change weeks value to however many you want

# Define headers with the token
headers = {"token": token}

# Function to fetch data for a specific datatype
def fetch_data(datatype):
    params = {
        "datasetid": "GHCND",        # Daily summaries dataset
        "locationid": "FIPS:12105",  # Polk County FIPS Code
        "startdate": week_timeframe,
        "enddate": current_date,
        "datatypeid": datatype,      # Specific datatype (e.g., TMIN, TMAX, PRCP)
        "limit": 1000,
        "units": "standard"
    }
    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data.get("results", []))
    else:
        print(f"Error fetching {datatype} data:", response.status_code, response.text)
        return pd.DataFrame()

# Fetch data for each datatype
tmin_data = fetch_data("TMIN")
tmax_data = fetch_data("TMAX")
prcp_data = fetch_data("PRCP") 

# Combine all data into a single DataFrame
historical_df = pd.concat([tmin_data, tmax_data, prcp_data], ignore_index=True)

# Clean and aggregate the data
# Ensure the value column is numeric
historical_df["value"] = pd.to_numeric(historical_df["value"], errors="coerce")

# Pivot table to create separate columns for TMIN, TMAX, and PRCP averages
aggregated = historical_df.pivot_table(
    index="date",                # Group by date
    columns="datatype",          # Use datatype (TMIN, TMAX, PRCP) as columns, preciptation measured in inches, temps measured in F
    values="value",              # Aggregate the "value" column
    aggfunc="mean"               # Calculate the mean for each date and datatype
).reset_index()

# flatten the column index - for some reason messes up column names if don't do this
aggregated.columns.name = None
aggregated.rename(columns={"TMIN": "tempMin", "TMAX": "tempMax", "PRCP": "precipitation"}, inplace=True)

# Make lists of the values averaged to help see what's going on

# group by the different datatypes
grouped_values = historical_df.groupby(["date", "datatype"])["value"].apply(list).reset_index()

# pivot so they match
lists_pivot = grouped_values.pivot(index="date", columns="datatype", values="value").reset_index()

# rename columns 
lists_pivot.columns.name = None
lists_pivot.rename(columns={"TMIN": "tempMin_values", "TMAX": "tempMax_values", "PRCP": "precipitation_values"}, inplace=True)

# Merge dfs
final_df = pd.merge(aggregated, lists_pivot, on="date", how="left")

# Clean up the `date` column to keep only the YYYY-MM-DD part
final_df["date"] = final_df["date"].str[:10]

# Round the `precipitation` column to 2 decimal places
final_df["precipitation"] = final_df["precipitation"].round(2)
final_df["tempMax"] = final_df["tempMax"].round(1)
final_df["tempMin"] = final_df["tempMin"].round(1)


# output
print("Aggregated Daily Weather:")
display(final_df)


Aggregated Daily Weather:


Unnamed: 0,date,precipitation,tempMax,tempMin,precipitation_values,tempMax_values,tempMin_values
0,2024-11-09,0.0,86.0,70.5,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0,...","[88.0, 84.0]","[71.0, 70.0]"
1,2024-11-10,0.01,88.5,71.5,"[0.0, 0.06, 0.0, 0.0, 0.0, 0.03, 0.0, 0.0, 0.0...","[90.0, 87.0]","[72.0, 71.0]"
2,2024-11-11,0.0,90.0,71.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[90.0],[71.0]
3,2024-11-12,0.0,88.5,70.5,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[90.0, 87.0]","[72.0, 69.0]"
4,2024-11-13,0.0,87.0,71.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[87.0],[71.0]
5,2024-11-14,0.0,,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",,
