# Weather-summary-test

Test project for weather_summary.py module

# Environment

## Library Imports

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import re
import json
import time

import us

## Local Imports

In [None]:
import xutilities
import weather_summary

## File Paths

In [None]:
# https://medium.com/@rrfd/cookiecutter-data-science-organize-your-projects-atom-and-jupyter-2be7862f487e
# Base Path
base_path = Path.cwd()

# Data paths
data_path = base_path / 'data'
raw_data_path = data_path / 'raw'
interim_data_path = data_path / 'interim'
processed_data_path = data_path / 'processed'
external_data_path = data_path / 'external'

# Reports paths
reports_path = base_path / 'reports'
figures_path = reports_path / 'figures'

# Input paths
tests_data_path = base_path / 'tests'

# Outputs paths
# summary_weather_report_path = reports_path

# Credentials
credentials_openweather_path = Path.home() / 'credentials-openweather.yml'

## Constants and Globals

In [None]:
# Constants and Globals
LAKE_CUNNINGHAM_LOCATION = (37.335471, -121.806204)
LAKE_CUNNINGHAM_LATITUDE, LAKE_CUNNINGHAM_LONGITUDE = LAKE_CUNNINGHAM_LOCATION

# Code

In [None]:
def generate_sample_weather_summaries(city_list: pd.DataFrame):
    # city_list has columns ['City', 'State', 'Coordinates']
    for idx, row in city_list.iterrows():
    
        reporting_location = tuple([float(cc) for cc in row.Coordinates.split(',')])

        state_abbr = us_name_to_abbr.get(row.State, row.State)
        city_state = f'{row.City},{state_abbr}'
        print(f'{idx:04d} {city_state:<40}{row.Coordinates:>20}')

        summary = weather_summary.create_weather_summary(reporting_location, openweather_api_key)
        summary_ts = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        summary_weather_report_path = reports_path / f'WeatherSummary-{state_abbr}-{row.City}-{summary_ts}.txt'
        with open(summary_weather_report_path, 'w') as fp:
            _ = fp.write(summary)

        # Rate limit
        time.sleep(1)

# Main

In [None]:
if __name__ == '__main__':
    
    # Initializations
    credentials = xutilities.load_credentials(credentials_openweather_path)['credentials']
    openweather_api_key = credentials['api_key']

    us_name_to_abbr = us.states.mapping('name', 'abbr')
    
    summary = weather_summary.create_weather_summary(LAKE_CUNNINGHAM_LOCATION, openweather_api_key)
    print(summary)
    
#     generate_sample_weather_summaries(full_city_list.sample(n = 10, replace = False, axis = 0))

# Test

For the free tier of service, these limits apply:

- 60 calls/minute
- Historical weather 5 days (1,000 API calls per day by using One Call API) 

Given that, we rate limit to 1 call/sec and only look at 25 cities

## Test Data

In [None]:
# 1000 Largest US Cities
# https://public.opendatasoft.com/explore/dataset/1000-largest-us-cities-by-population-with-geographic-coordinates/table/?sort=-rank
test_data_path = tests_data_path / '1000-largest-us-cities-by-population-with-geographic-coordinates.csv'
test_data = pd.read_csv(test_data_path, sep = ';', index_col=False)

# Abbreviations
us_states_data_path = tests_data_path / 'us_states.csv'
us_states = pd.read_csv(us_states_data_path, sep = ';', index_col=False)

# https://github.com/jasperdebie/VisInfo/blob/master/us-state-capitals.csv
us_state_capitals_data_path = tests_data_path / 'us-state-capitals.csv'
us_state_capitals = pd.read_csv(us_state_capitals_data_path, sep = ',', index_col=False)
us_state_capitals.columns = ['State', 'City', 'latitude', 'longitude']
# Atlanta, GA and Hartford, CT both have '<br>' on the end - why?
us_state_capitals.City = us_state_capitals.City.apply(lambda s: s.replace('<br>', ''))

us_state_capitals['Coordinates'] = us_state_capitals.apply(lambda row: f'{row["latitude"]},{row["longitude"]}', 
                                                           axis=1)

# Full city list (capitals + 1000 largest)
cols = ['City', 'State', 'Coordinates']
full_city_list = pd.concat([us_state_capitals[cols], test_data[cols]], ignore_index=True)
# display(full_city_list.head())
# display(full_city_list.tail())

## Test Code

In [None]:
full_city_list.sample(n = 10, replace = False, axis = 0)

In [None]:
generate_sample_weather_summaries(full_city_list.sample(n = 5, replace = False, axis = 0))

# OpenWeather City List

In [None]:
open_weather_city_list = pd.read_json(base_path / 'city.list.json')
open_weather_city_list.head()

In [None]:
s_open_weather_city_list = open_weather_city_list.sort_values(['country', 'state', 'name'])

In [None]:
s_open_weather_city_list[s_open_weather_city_list.country=='US']

# Marysville, WA

In [None]:
# City	Rank	State	Growth From 2000 to 2013	Population	Coordinates
# Marysville	552	Washington	115.7	63269	48.0517637,-122.1770818

In [None]:
MARYSVILLE_WA_LOCATION = (48.0517637,-122.1770818)

with open(tests_data_path / 'Marysville-historical-results.json', 'r') as fp:
    historical_json = json.load(fp)
    mw_historical = pd.json_normalize(historical_json)

with open(tests_data_path / 'Marysville-results.json', 'r') as fp:
    results_json = json.load(fp)
    mwa_results = pd.json_normalize(results_json)

In [None]:
pd.json_normalize(historical_json)

In [None]:
today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
utoday = datetime.timestamp(today)
reporting_location = MARYSVILLE_WA_LOCATION

results, current, daily, hourly = weather_summary.transform_weather_results(results_json)
hist_results, actual_current, actual_daily, actual_hourly = weather_summary.transform_weather_results(historical_json)

summary = weather_summary.summary_weather_report(results, daily, current, reporting_location, actual_hourly)
print(summary)