# Project Summary

I grew up in Weld County, Colorado and I have a memory of my mother never letting us use our pool unless it was over 85 degrees Fahrenheit. As I have gotten older, I was always curious as to her reason why, my assumption being that we would use the pool more infrequently. I want to investigate my hypothesis by looking at the temperature for the years we had a pool at our home. This will help me to get an initial glance at how likely the temperature throughout the year would be above 85° F.

# Data at a Glance

I lived in Greeley, CO which is in Weld County and I was able to pull data from the ncei website to get monthly values for the the monthly max temperature and avg temperatures from 2000 - 2007.

In [8]:
import numpy as np
import pandas as pd
from bokeh.io import output_notebook, output_file, show
from bokeh.layouts import column
from bokeh.models import Div, ColumnDataSource, HoverTool
from bokeh.plotting import figure

def process_temperature_data(url):
    # Fetch Data from NCEI website and turn it into a DataFrame
    df = pd.read_csv(url, names=['Year', 'Temperature'])
    
    # Edit DataFrame
    df = df.iloc[4:, :]  # Delete the first four rows
    df['Month'] = df['Year'].str[4:]  # Get Month into its own column
    df['Year'] = df['Year'].str[:4]  # Get Year into the correct format
    
    # Convert 'Temperature' column to numeric
    df['Temperature'] = pd.to_numeric(df['Temperature'], errors='coerce')
    
    # Map numerical month values to month names
    month_names = {
        '01': 'January', '02': 'February', '03': 'March', '04': 'April',
        '05': 'May', '06': 'June', '07': 'July', '08': 'August',
        '09': 'September', '10': 'October', '11': 'November', '12': 'December'
    }
    df['Month'] = df['Month'].map(month_names)
    
    return df

def create_temperature_plot(dataframes, titles):
    # Create Bokeh figure for Fahrenheit data
    p = figure(
        width=700, height=600, toolbar_location=None,
        title="Temperature over Time: Weld County, CO from 2000 - 2007")
    
    p.title.text_font_size = '16pt'
    p.xaxis.axis_label_text_font_size = "15pt"
    p.yaxis.axis_label_text_font_size = "15pt"
    # Define a list of colors for the lines
    line_colors = ['green', 'blue', 'red', 'orange', 'purple']  # Add more colors if needed
    
    # Plot data from each DataFrame as a separate line
    for i, df in enumerate(dataframes):
        df_grouped = df.groupby(['Year', 'Month'])['Temperature'].mean().reset_index()
        df_grouped['Date'] = pd.to_datetime(df_grouped['Year'] + df_grouped['Month'], format='%Y%B')
        df_grouped = df_grouped.sort_values(by='Date')
        
        # Plot the line for monthly temperatures with a distinct color
        p.line(x='Date', y='Temperature', source=df_grouped, line_width=2, 
               legend_label=titles[i], line_color=line_colors[i % len(line_colors)])

        # Add circles at each data point with the same line color
        #p.circle(x='Date', y='Temperature', source=df_grouped, size=6, legend_label='Data Points',
        #         line_color=line_colors[i % len(line_colors)])  # Set the line_color
        p.circle(x='Date', y='Temperature', source=df_grouped, size=6,
                 line_color=line_colors[i % len(line_colors)])  # Set the line_color
    
    # Customize plot appearance
    p.y_range.start = 0
    p.xaxis.axis_label = "Time (Years)"
    p.yaxis.axis_label = "Temperature (°F)"

    # Customize x-axis tick locations and labels using SingleIntervalTicker
    date_ticks = pd.date_range(start=df_grouped['Date'].min(), end=df_grouped['Date'].max(), freq='AS')
    p.xaxis.ticker = [tick.timestamp() * 1000 for tick in date_ticks]  # Convert to milliseconds
    p.xaxis.major_label_overrides = {tick.timestamp() * 1000: label for tick, label in zip(date_ticks, date_ticks.strftime("%Y"))}

    # Plot a horizontal line at y=85 using a horizontal line glyph
    p.ray(x=[df_grouped['Date'].min()], y=[85], length=0, angle=0, line_color='red', 
      line_dash='dashed', line_width=2, legend_label='85 (°F)')

    return p


def add_hover_tool(p, dataframes, titles):
    # Create a ColumnDataSource to include data for the HoverTool
    data_sources = []
    for i, df in enumerate(dataframes):
        data_source = ColumnDataSource(data={'Year': df['Year'], 'Month': df['Month'], 'Temperature': df['Temperature']})
        data_sources.append(data_source)
    
    # Create HoverTool with customized tooltips for each DataFrame
    hover = HoverTool(mode="vline")
    tooltips = [("Year", "@Year"), ("Month", "@Month")]
    for i, title in enumerate(titles):
        tooltips.append((f"{title} Temperature", "@{Temperature}{0.0}°F"))  # Format to one decimal place
    hover.tooltips = tooltips
    
    # Add HoverTool to the figure
    p.add_tools(hover)
    
    return data_sources

def main():
    # Enable Bokeh to display plots in the notebook
    output_notebook()
    
    # Define URLs for temperature data and titles for the plots
    urls = [
        'https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/'
        'county/time-series/CO-123/tmax/all/12/2000-2007.csv',
        'https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/'
        'county/time-series/CO-123/tavg/all/12/2000-2007.csv'
    ]
    
    titles = [
        'Max Temperature',
        'Average Temperature'
    ]
    
    # Process and create plots for each DataFrame
    dataframes = [process_temperature_data(url) for url in urls]
    
    # Create temperature plot
    p = create_temperature_plot(dataframes, titles)
    
    # Add HoverTool to the plot
    data_sources = add_hover_tool(p, dataframes, titles)

    #Define
    
    # Create and display a Div with additional information
    div = Div(text=r"""
    A plot of the spectral radiance, defined as a function of the frequency $$\nu$$, is given by the formula
    """)
    
    # Show the plot and Div
    show(p)

if __name__ == "__main__":
    main()

output_file('temperature-over-time-output.html')
output_notebook()


# Results

As we are only getting a monthly view with this dataset, we see that for most summer months, there was at least one day over 85 degrees. We also see that the average temperature for each month tells a different story. We now know that on average for each month, it was never even over 80 degrees. I believe my hypothesis to be correct, that my mother chose a temperature that it would hit every so often, but also would not happen as frequently.