### GET WEATHER DATA USING API

In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import json
import csv

In [None]:
fips_list = [str(x).zfill(2) for x in range(1,61)]
for x in ['03', '07', '14', '43', '52', '57', '58', '59']:
    fips_list.remove(x)
fips_list

In [None]:
stations_path = '../data/1_raw/meteo/stations.csv'

In [None]:
#column_headers = list(data[0].keys())
column_headers = [
    'elevation',
    'mindate',
    'maxdate',
    'latitude',
    'name',
    'datacoverage',
    'id',
    'elevationUnit',
    'longitude']

In [None]:
with open(stations_path, 'w', newline='') as csv_file:
    # Create a CSV writer object
    csv_writer = csv.DictWriter(csv_file, fieldnames=column_headers)
    
    # Write the column headers to the CSV file
    csv_writer.writeheader()

In [None]:
for fips in fips_list:
    
    offset=0
    empty_response = False
    
    while not empty_response:
        url=f'https://www.ncei.noaa.gov/cdo-web/api/v2/stations?locationid=FIPS:{fips}&datacategoryid=TEMP&startdate=2010-01-01&enddate=2022-12-31&limit=1000&offset={offset}'
        headers = {'token': 'geenLBYxrnMaVJmsIXldbsCqTQNHfSpH'}
    
        response = requests.get(url=url, headers=headers)
        if response.status_code == 200:
            json_response = response.json()  # Convert response to JSON
            
            if not json_response:
                empty_response = True
            else:
                data = json_response['results']

                with open(stations_path, 'a', newline='') as csv_file:
                    # Create a CSV writer object
                    csv_writer = csv.DictWriter(csv_file, fieldnames=column_headers)

                    # Write each row of data to the CSV file
                    for row in data:
                        csv_writer.writerow(row)
                
                offset += 1000

    print(f'Data exported for FIPS {fips}')

In [None]:
# create file
data_gsom_path = '../data/1_raw/meteo/data_gsom.csv'

column_headers = ['date', 'datatype', 'station', 'attributes', 'value']

In [None]:
with open(data_gsom_path, 'w', newline='') as csv_file:
    # Create a CSV writer object
    csv_writer = csv.DictWriter(csv_file, fieldnames=column_headers)
    
    # Write the column headers to the CSV file
    csv_writer.writeheader()

In [None]:
for year in range(2010, 2023):
    for fips in fips_list:
        
        offset=0
        empty_response = False
        
        while not empty_response:
            url=f'https://www.ncei.noaa.gov/cdo-web/api/v2/data?datasetid=GSOM&locationid=FIPS:{fips}&datatypeid=TMAX,TMIN,TAVG&startdate={year}-01-01&enddate={year}-12-31&limit=1000&offset={offset}'
            headers = {'token': 'geenLBYxrnMaVJmsIXldbsCqTQNHfSpH'}
        
            response = requests.get(url=url, headers=headers)
            if response.status_code == 200:
                json_response = response.json()  # Convert response to JSON
                
                if not json_response:
                    empty_response = True
                else:
                    data = json_response['results']
    
                    with open(data_gsom_path, 'a', newline='') as csv_file:
                        # Create a CSV writer object
                        csv_writer = csv.DictWriter(csv_file, fieldnames=column_headers)
    
                        # Write each row of data to the CSV file
                        for row in data:
                            csv_writer.writerow(row)
                    
                    offset += 1000
    
        print(f'Data exported for year {year}, FIPS {fips}')

### FILTER STATIONS WITH FULL DATA COVERAGE ONLY

In [None]:
df_stations = pd.read_csv(stations_path)
df_stations.head()

In [None]:
#stations_cols = df_stations.columns.tolist()
stations_cols = [
    'id', 
    'name',    
    'latitude', 
    'longitude', 
    'elevation',
    'elevationUnit',    
    'mindate',
    'maxdate',
    'datacoverage',
 ]

In [None]:
df_stations = df_stations[stations_cols]

In [None]:
# rename some columns
df_stations.rename(columns={'id': 'station_id', 
                            'name': 'station_name',
                            'latitude': 'station_lat', 
                            'longitude': 'station_lon',
                            'elevation': 'station_el'}, inplace=True)

In [None]:
df_stations_full = df_stations.loc[
                            (df_stations['mindate'] <= '2009-12-01')
                            & (df_stations['maxdate'] >= '2023-01-01')]

In [None]:
df_stations_full.to_csv('../data/1_raw/meteo/stations_full_coverage.csv', index=True, index_label='station_idx')

### PROCESS WEATHER DATA

In [None]:
cols_weather = ['field_1', 
              'LOCAL_DATE', 
              'LOCATION_L', 
              'LOCATION_1', 
              'station_id', 
              'station_la', 
              'station_lo', 
              'station_el', 
              'zones']

In [None]:
df_data_gsom = pd.read_csv(data_gsom_path, header=0)
df_data_gsom.shape  

In [None]:
df_data_gsom['YYYY-mm'] = pd.to_datetime(df_data_gsom['date']).dt.strftime('%Y-%m')

In [None]:
# rename some columns
df_data_gsom.rename(columns={'station': 'station_id'}, inplace=True)

In [None]:
df_weather = df_data_gsom.pivot_table(index=['YYYY-mm', 'station_id'], columns='datatype', values='value')
df_weather.reset_index(inplace=True)

In [None]:
# add station data to weather data
df_weather = pd.merge(df_weather, df_stations[['station_lat', 'station_lon', 'station_id']], how='left', on='station_id')

In [None]:
# export to csv
df_weather.to_csv('../data/1_raw/meteo/weather.csv', index=False)