In [1]:

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Import Packages
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
dv_url = (
    "https://www.ncei.noaa.gov/access/services/data/v1?"
    "dataset=daily-summaries&"
    "stations=USC00042319&"
    "dataTypes=TOBS&"
    "startDate=2000-01-01&"
    "endDate=2024-12-31&"
    "units=metric" # this here will get me the data in celsius
)

dv_url



'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&stations=USC00042319&dataTypes=TOBS&startDate=2000-01-01&endDate=2024-12-31&units=metric'

In [3]:


# Download the climate data using the url
dv_df = pd.read_csv(
    dv_url,
    index_col='DATE',
    parse_dates=True,
    na_values=['NaN']
)

# Check if the data was downloaded
dv_df.head()

Unnamed: 0_level_0,STATION,TOBS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-01,USC00042319,3.3
2000-01-02,USC00042319,12.8
2000-01-03,USC00042319,1.7
2000-01-04,USC00042319,3.3
2000-01-05,USC00042319,1.1


In [4]:

# selecting the column for temperature at time of observation and 
# reassign to the dataframe.
dv_df = dv_df[['TOBS']]
dv_df

Unnamed: 0_level_0,TOBS
DATE,Unnamed: 1_level_1
2000-01-01,3.3
2000-01-02,12.8
2000-01-03,1.7
2000-01-04,3.3
2000-01-05,1.1
...,...
2024-12-27,
2024-12-28,
2024-12-29,
2024-12-30,


In [5]:
# Resample data to obtain the mean annual temperature.
ann_dv_df = dv_df.resample('YS').mean()
ann_dv_df.head()

Unnamed: 0_level_0,TOBS
DATE,Unnamed: 1_level_1
2000-01-01,21.18388
2001-01-01,21.863611
2002-01-01,20.953297
2003-01-01,21.145961
2004-01-01,21.689011


In [6]:
# Plot the annual data interactively
ann_dv_df_plot = ann_dv_df.hvplot(y='TOBS',
    title=' Mean Annual Temperature (C) of \nDeath Valley National Park, USA',
    xlabel='Year',
    ylabel='Mean Annual Temperature (Celsius)'
    )

ann_dv_df_plot

In [7]:

# Drop NaN values
ann_dv_df_new = ann_dv_df.dropna()

# Reshape 'Year' column to be a 2D array for scikit-learn
X = ann_dv_df_new.index.values.reshape(-1,1)
Y = ann_dv_df_new['TOBS'].values

# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, Y)

# Get the slope to understand the average warming per year
slope = model.coef_[0]

