## Assignment 1: Open Weather Map Weather Forecast

### Objectives:
1. Use the Open Weather Map API to query future-day forecasts containing expected daily weather conditions for 17 different cities.
2. Parse the Python variables returned by the API to extract the the minimum and maximum forecast temperature temperatureMini and temperatureMaxi in Celsius for each of the upcoming five (5) days.
3. Compute the average minimum and maximum forecast temperatures Minavg and Maxavg for the upcoming five (5) days.
4. Write the location description loc we provide below, the five minimum and maximum forecast temperatures, and the overall minimum and maximum forecast temperatures to a CSV file named temp.csv, one line per city.

*The API provides weather data every 3 hours for a 5 day forecast at any city.*


In [1]:
# Import necessary packages

import os # for creating environment variables
import requests # for handling JSON data
import pandas as pd # for creating dataframes
import numpy as np # for vectorized operations
from datetime import datetime, timedelta # for dealing with datetime objects
from pandas.io.json import json_normalize # for handling nested JSON data

Create an account with Open Weather Map API to get your API key. Store the API key as an environment variable on your local system.

In [2]:
# Assign API key and url segment without the query string to variables

api_key = os.environ.get('API_KEY')
url_partial = 'https://api.openweathermap.org/data/2.5/forecast?'

After looking at the API documentation, the API call must be in the following format:

"https://api.openweathermap.org/data/2.5/forecast?q={city_name}&appid={your_api_key}"

In [3]:
"""
Step 1: Get 5-day weather forecast data via API
Step 2: Create dictionary mapping country code to country name
Step 3: Find the index that marks the first next day when forecast begins for EACH city
Step 4: Parse JSON data to city name and temperature values (one row per city)
Step 5: Combine rows into output dataframe
Step 6: Export to csv
"""

class CityForecast:

    def __init__(self, api_key, url_partial):
        self.api_key = api_key
        self.url_partial = url_partial
        pass
    
    # Step 1: Get 5-day weather forecast data via API
    def get_weather_data(self, cities_list):
        # Create empty list to hold JSON data for each city
        data_list = []
        # Convert each city in list to proper case
        cities_proper = [city.title() for city in cities_list]
        for cp in cities_proper:
            # Get full url with query string
            url_full = self.url_partial + 'q={}&appid={}&units=metric'.format(cp, self.api_key)
            # Perform API query and retrieve response
            response = requests.get(url_full)
            # Confirm successful query
            if response.status_code == 200:
                # Convert json data to python dict
                data = response.json()
                data_list.append(data)
        return data_list
    
    # Step 2: Create dictionary mapping country code to country name
    def get_country_dict(self):
        country_dict = {
            'Anchorage': 'USA',
            'Buenos Aires': 'Argentina',
            'São José dos Campos': 'Brazil',
            'San Jose': 'Costa Rica',
            'Nanaimo': 'Canada',
            'Ningbo': 'China',
            'Giza': 'Egypt',
            'Mannheim': 'Germany',
            'Hyderabad': 'India',
            'Tehran': 'Iran',
            'Bishkek': 'Kyrgyzstan',
            'Rīga': 'Latvia',
            'Quetta': 'Pakistan',
            'Warsaw': 'Poland',
            'Dhahran': 'Saudia Arabia',
            'Madrid': 'Spain',
            'Oldham': 'England'
        }
        return country_dict

    # Step 3: Find the index that marks the first next day when forecast begins for EACH city
    def get_next_day(self, block_list):
        # Get today's date and time
        dt = datetime.now()
        # Extract today's day
        day = dt.day
        # Loop over all the YYYY-mm-dd HH:MM:SS in the block list
        for i in range( 0, len( block_list ) ):
            # Split the current block into a list [ YY-mm-dd, HH:MM:SS ]
            dt_tm = block_list[ i ].split( ' ' )
            # Split the YYYY-mm-dd string into a list [ YYYY, mm, dd ]
            yr_mth_day = dt_tm[ 0 ].split( '-' )
            # Extract the block's day from the [ YYYY, mm, dd ] list as an integer
            cur_day = int( yr_mth_day[ 2 ] )    
            # If the block's day isn't the same as today's day, break, we've found the next day
            if day != cur_day:    
                break
        # Return the index (location) of the first occurrence of the next day in the block list
        return i
    
    # Step 4: Parse JSON data to city name and temperature values (one row per city)
    @staticmethod
    def get_row(data):
        # Create list of column names to be assigned to df
        headers = ['City', 'Min 1', 'Max 1', 'Min 2', 'Max 2', 'Min 3', 'Max 3', 'Min 4', 'Max 4', 'Min 5', 'Max 5', 'Min Avg', 'Max Avg']
        # Initialize empty df for storing data
        df = pd.DataFrame()
        # Create series for city column with city name, country name
        city_series = pd.Series(data['city']['name'] + ', ' + country_dict[data['city']['name']])
        # Create a temporary df that holds the flattened JSON data per city
        temp_df = pd.json_normalize(data['list'])
        # Get index of first forecast day
        start = obj.get_next_day(block_list)
        # Initialize i to represent number of forecasted days
        i = 1
        # Get lists of min and max temps for 3-hr blocks of the entire day
        while (start+8) < 39:
            while i < 6:
                min_s = pd.Series(temp_df['main.temp_min'][start:(start+8)].min())
                max_s = pd.Series(temp_df['main.temp_max'][start:(start+8)].max())
                df = pd.concat([df, min_s, max_s], axis=1)
                i += 1
                start += 8
        # Extract min and max temps for each day and add to df
        min_avg = pd.Series(min_s.mean())
        max_avg = pd.Series(max_s.mean())
        df = pd.concat([city_series, df, min_avg, max_avg], axis=1)
        df.columns = headers
        return df 

    # Step 5: Combine rows into output dataframe  
    def get_output_df(self):
        df_list = []
        result = pd.DataFrame()

        for data in json_data:
            df_list.append(CityForecast.get_row(data))

        result = result.append(df_list, ignore_index=True)
        return result
    
    # Step 6: Export to csv
    def export_to_csv(self, result):
        return result.to_csv('temp.csv', float_format='%.2f', index = False, encoding='utf-8')

In [4]:
# Create class object to perform forecast 
obj = CityForecast(api_key, url_partial)

In [5]:
# Create list of cities to be forecasted
cities_list = [
    'Anchorage',
    'Buenos Aires',
    'São José dos Campos',
    'San José',
    'Nanaimo',
    'Ningbo',
    'Giza',
    'Mannheim',
    'Hyderabad',
    'Tehran',
    'Bishkek',
    'Riga',
    'Quetta',
    'Warsaw',
    'Dhahran',
    'Madrid',
    'Oldham'
]

# Use class method to extract JSON data as Python data
json_data = obj.get_weather_data(cities_list)

In [6]:
# Create dictionary mapping city name to country name for list of 17 possible cities
country_dict = obj.get_country_dict()

In [7]:
# Create list of 3-hr blocks spanning across the time you queried and the next five days
for data in json_data:
    block_list = [data['list'][i]['dt_txt'] for i in range(40)]
    
# Use class method to get the index of the first forecast day and assign it to the start variable
start = obj.get_next_day(block_list)

In [8]:
result = obj.get_output_df()
result

Unnamed: 0,City,Min 1,Max 1,Min 2,Max 2,Min 3,Max 3,Min 4,Max 4,Min 5,Max 5,Min Avg,Max Avg
0,"Anchorage, USA",12.45,18.48,13.69,18.36,13.36,19.21,13.22,17.38,11.98,12.94,11.98,12.94
1,"Buenos Aires, Argentina",7.34,13.78,8.84,14.03,10.54,15.9,12.46,17.38,14.99,16.62,14.99,16.62
2,"São José dos Campos, Brazil",13.13,15.49,12.19,13.67,10.95,17.52,10.44,21.99,10.25,12.49,10.25,12.49
3,"San Jose, Costa Rica",18.07,33.2,18.26,35.1,18.21,34.93,18.98,35.82,18.92,33.08,18.92,33.08
4,"Nanaimo, Canada",16.3,17.35,12.61,18.81,14.43,18.96,15.24,19.68,13.74,17.35,13.74,17.35
5,"Ningbo, China",25.18,33.93,25.24,32.97,26.24,35.24,27.61,37.02,28.91,36.32,28.91,36.32
6,"Giza, Egypt",23.83,38.19,23.43,38.54,24.39,39.05,24.75,39.14,24.76,33.55,24.76,33.55
7,"Mannheim, Germany",24.27,33.43,22.01,27.71,19.88,24.42,17.89,24.8,18.0,23.12,18.0,23.12
8,"Hyderabad, India",22.56,25.85,23.08,29.47,23.12,29.38,23.42,29.9,23.33,29.63,23.33,29.63
9,"Tehran, Iran",29.02,37.85,30.33,38.0,28.28,36.29,26.45,33.57,24.93,29.92,24.93,29.92


In [9]:
# Use class method to export dataframe to csv
obj.export_to_csv(result)