In [1]:
# required imports for `acquisition.py`

import pandas as pd
import hmac
import hashlib
import requests
from io import StringIO

In [2]:
# NCEI dataset integrity check

## opens the verification checksums text file, `checksums.txt`, and reads it
with open('../checksums.txt', 'rb') as f:
    data = f.read()

    ### locates the NCEI checksum and assigns it to variable `checksum_verify`
    checksum_verify = str(data.split()[1]).split("'")[1]

## fetches the NCEI dataset URL and response
url = 'https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/statewide/time-series/13/pcp/1/9/1895-2024.csv?base_prd=true&begbaseyear=1901&endbaseyear=2000'
response = requests.get(url)

## calculates the checksum for the local version of the response
checksum = hashlib.sha256(response.text.encode('utf-8')).hexdigest()

## checks if the official checksum, `checksum_verify`, is equal to the local checksum
if checksum == checksum_verify:

    ### if integrity check passes, export raw dataset as `../data/raw/ncei.csv`
    print('NCEI checksum verification passed.\n')
    df_ncei = pd.read_csv(StringIO(response.text), skiprows=4)
    df_ncei.to_csv('../data/raw/ncei.csv')

## if integrity check fails, print failure message
else:
   print('NCEI checksum verification failed.')

NCEI checksum verification passed.



In [3]:
# Iowa dataset integrity check

## opens the verification checksums text file, `checksums.txt`, and reads it
with open('../checksums.txt', 'rb') as f:
    data = f.read()

    ### locates the Iowa checksum and assigns it to variable `checksum_verify`
    checksum_verify = str(data.split()[3]).split("'")[1]

## queries the data.iowa.gov database, using ".csv?$limit=583174" to ensure csv format and to export full dataset length and retrieves the response
endpoint = 'https://data.iowa.gov/resource/tw78-ziwj.csv?$limit=583174'
response = requests.get(endpoint)

## calculates the checksum for the local version of the response
checksum = hashlib.sha256(response.text.encode('utf-8')).hexdigest()

## checks if the official checksum, `checksum_verify`, is equal to the local checksum
if checksum == checksum_verify:

    ### if integrity check passes, export raw dataset as `../data/raw/iowa.csv`
    print('data.iowa.gov verification passed.\n')
    df_iowa = pd.read_csv(StringIO(response.text))
    df_iowa.to_csv('../data/raw/iowa.csv')

## if integrity check fails, print failure message
else:
   print('data.iowa.gov checksum verification failed.')

data.iowa.gov verification passed.



  df_iowa = pd.read_csv(StringIO(response.text))


In [9]:
len(df_iowa)/3

194391.33333333334

In [10]:
194391+194391

388782