## Imports

In [1]:
import numpy as np
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
sns.set_theme(style="darkgrid")

## Get Data

In [2]:
url = "https://www.ncei.noaa.gov/access/services/data/v1"
params = {
    "dataset": "daily-summaries",
    "stations": "USC00457180,USC00390043",
    "startDate": "2010-01-01",
    "endDate": "2020-12-31",
    "dataTypes": "PRCP,TMAX,TMIN",
    "format": "json",
    "units": "metric",
    "includeStationName": "true",
    "includeStationLocation": "true"
}

response = requests.get(url, params=params)

if response.status_code == 200:
    data = response.json()
    df = pd.DataFrame(data)
    df.to_csv('data/rain_temp.csv',index=False)
    print(df.head())
else:
    print(f"Error {response.status_code}: {response.text}")

         DATE      STATION   LONGITUDE TMAX ELEVATION  TMIN  PRCP  LATITUDE  \
0  2010-01-01  USC00457180  -117.36364  5.6     737.6   1.7  11.4  47.23446   
1  2010-01-02  USC00457180  -117.36364  7.2     737.6   1.7   0.0  47.23446   
2  2010-01-03  USC00457180  -117.36364  4.4     737.6  -1.1   0.0  47.23446   
3  2010-01-04  USC00457180  -117.36364  5.6     737.6   1.7   1.3  47.23446   
4  2010-01-05  USC00457180  -117.36364  3.9     737.6   0.0   8.4  47.23446   

             NAME  
0  ROSALIA, WA US  
1  ROSALIA, WA US  
2  ROSALIA, WA US  
3  ROSALIA, WA US  
4  ROSALIA, WA US  


## Explore and Pre-process

In [3]:
print(df.columns)

Index(['DATE', 'STATION', 'LONGITUDE', 'TMAX', 'ELEVATION', 'TMIN', 'PRCP',
       'LATITUDE', 'NAME'],
      dtype='object')


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7977 entries, 0 to 7976
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   DATE       7977 non-null   object
 1   STATION    7977 non-null   object
 2   LONGITUDE  7977 non-null   object
 3   TMAX       7958 non-null   object
 4   ELEVATION  7977 non-null   object
 5   TMIN       7960 non-null   object
 6   PRCP       7955 non-null   object
 7   LATITUDE   7977 non-null   object
 8   NAME       7977 non-null   object
dtypes: object(9)
memory usage: 561.0+ KB


In [5]:
missing_vals = df.isnull().sum()

In [6]:
missing_vals

DATE          0
STATION       0
LONGITUDE     0
TMAX         19
ELEVATION     0
TMIN         17
PRCP         22
LATITUDE      0
NAME          0
dtype: int64

In [7]:
df = df.dropna()

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7938 entries, 0 to 7976
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   DATE       7938 non-null   object
 1   STATION    7938 non-null   object
 2   LONGITUDE  7938 non-null   object
 3   TMAX       7938 non-null   object
 4   ELEVATION  7938 non-null   object
 5   TMIN       7938 non-null   object
 6   PRCP       7938 non-null   object
 7   LATITUDE   7938 non-null   object
 8   NAME       7938 non-null   object
dtypes: object(9)
memory usage: 620.2+ KB


In [9]:
df['DATE'] = pd.to_datetime(df['DATE'])

df['LONGITUDE'] = pd.to_numeric(df['LONGITUDE'],errors='coerce')
df['LATITUDE'] = pd.to_numeric(df['LATITUDE'],errors='coerce')

df['TMAX'] = pd.to_numeric(df['TMAX'],errors='coerce')
df['TMIN'] = pd.to_numeric(df['TMIN'],errors='coerce')

df['PRCP'] = pd.to_numeric(df['PRCP'],errors='coerce')

df['ELEVATION'] = pd.to_numeric(df['ELEVATION'],errors='coerce')

In [12]:
fig = px.line(
    df,
    x='DATE',
    y='PRCP',
    color='NAME', 
    labels={
        'DATE': 'Date',
        'PRCP': 'Precipitation (mm)',
        'NAME': 'Station Name'
    },
    title='Daily Precipitation by Station'
)

fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Precipitation (mm)',
    legend_title='Station Name',
    template='plotly_white'
)

fig.show()