# Weather Database Generation

This script generates a database of potential vacation locations, along with their weather data

In [1]:
# Namespace
# Modules
import numpy as np
import pandas as pd
import requests
import string
import sys
import time
import os
from datetime import datetime
from citipy import citipy
from pprint import pprint

# https://www.geeksforgeeks.org/python-import-module-from-different-directory/
sys.path.insert(0,'../')
from config import weather_api_key

### Develop set of random lat/long coordinates

In [3]:
num_cities = 2000

# generate random lat/long values
lats = np.random.uniform(low=-90.0, high=90.0, size=num_cities)
longs = np.random.uniform(low=-180.0, high=180.0, size=num_cities)
coords_zip = zip(lats,longs)
coords = list(coords_zip)

### Identify closest cities to coordinates

This cell does the following:

* Process city data in chunks of 50 API calls to satisfy API restrictions, and pauses for a minute between each set of calls
* Converts retrived API data into pandas dataframe
* Eliminates duplicate city/country combinations and any failed API calls where city is not found
* Tracks progress and generates robust logging output to external file

In [5]:
#instantiate base URL
base_url = "http://api.openweathermap.org/data/2.5/weather"

# instantiate list of results. will be comprised of dictionaries.
weather_data = list()

# instantiate list of full placenames (city plus country code) to eliminate dupes
full_placenames = list()

# Statistics for logging
dupe_count = 0
city_not_found_count = 0

#initialize logging
start_time = datetime.now()
log_file_name = os.path.join("logs", f"{start_time.timestamp()}.log")

with open(log_file_name,mode="x") as log_file:

    log_start_msg = f"Commencing processing.  Start time: {start_time}\n"
    print(log_start_msg)
    log_file.write(log_start_msg)
    
    record_count = 0
    set_count = 0
    
    for i, coord in enumerate(coords):
        if (i % 50 == 0):
            if (i >= 50):
                set_end_msg = (f"Processing completed for record set {set_count} at {datetime.now()}\n")

                print(set_end_msg)
                log_file.write(set_end_msg)

                time.sleep(60) # delay execution for 60 seconds.

            set_count+=1
            record_count = 1        
                
            set_start_msg = (f"Processing commenced for record set {set_count} at {datetime.now()}\n")
            
            print(set_start_msg)
            log_file.write(set_start_msg)

        # use citypy to get nearest cities
        city_obj = citipy.nearest_city(latitude=coords[i][0], longitude=coords[i][1])

        # Lower case strings for formal place names bother me.  So we're fixing that.
        city_name = string.capwords(city_obj.city_name)
        country_code = city_obj.country_code.upper()

        # Using full name including country code to determine uniqueness
        full_placename = f"{city_name},{country_code}"

        if full_placename not in full_placenames:
            full_placenames.append(full_placename)
            
            params={'appid':weather_api_key,
                    'q':full_placename,
                    'units':'imperial'
                   }

            weather_response = requests.get(url=base_url,params=params)

            try:
                weather_json = weather_response.json()

                weather_data.append({
                    "City" : city_name,
                    "Country" : country_code,
                    "Lat" : weather_json['coord']['lat'],
                    "Long" : weather_json['coord']['lon'],
                    "Max Temp" : weather_json['main']['temp_max'],
                    "Pct Humidity" : weather_json['main']['humidity'],
                    "Pct Cloudiness" : weather_json['clouds']['all'],
                    "Wind Speed" : weather_json['wind']['speed'],
                    "Description" : weather_json['weather'][0]['description']
                })

                log_file.write(f"{full_placename} -- Successfully processed.\n")
                
            except KeyError:
                log_file.write(f"{full_placename} -- API call failed to retrieve data.  Error code: {weather_response.status_code}.\n")
                city_not_found_count += 1

        else:
            log_file.write(f"{full_placename} -- Already exists in data set.  Skipping.\n")
            dupe_count += 1

        record_count += 1
            
    summary_text = (f"\n------------------------------------------\n" + 
                    f"Processing completed at {datetime.now()}.\n" + 
                    f"Successfully processed {len(weather_data)} total records.\n" + 
                    f"------------------------------------------\n" + 
                    f"Number of coordinates generated: {num_cities}\n" +
                    f"Number of duplicate city records identified: {dupe_count}\n" +
                    f"Total unique cities identified: {len(full_placenames)}\n" +
                    f"Number of cities with unidentifiable weather reports: {city_not_found_count}\n"
                    )
        
    print(summary_text)
    log_file.write(summary_text)
    
    print(f"Full logging details are available at {log_file_name}.")

Commencing processing.  Start time: 2021-12-31 13:29:57.615899

Processing commenced for record set 1 at 2021-12-31 13:29:57.617773

Processing completed for record set 1 at 2021-12-31 13:30:14.654240

Processing commenced for record set 2 at 2021-12-31 13:31:14.669960

Processing completed for record set 2 at 2021-12-31 13:31:28.250147

Processing commenced for record set 3 at 2021-12-31 13:32:28.257970

Processing completed for record set 3 at 2021-12-31 13:32:36.448441

Processing commenced for record set 4 at 2021-12-31 13:33:36.460568

Processing completed for record set 4 at 2021-12-31 13:33:50.204601

Processing commenced for record set 5 at 2021-12-31 13:34:50.215340

Processing completed for record set 5 at 2021-12-31 13:35:01.303763

Processing commenced for record set 6 at 2021-12-31 13:36:01.310581

Processing completed for record set 6 at 2021-12-31 13:36:12.331872

Processing commenced for record set 7 at 2021-12-31 13:37:12.339064

Processing completed for record set 7 a

### Save weather dataframe to csv and display results

In [6]:
weather_data_df = pd.DataFrame(weather_data)

weather_data_df.to_csv("WeatherPy_Database.csv")

weather_data_df

Unnamed: 0,City,Country,Lat,Long,Max Temp,Pct Humidity,Pct Cloudiness,Wind Speed,Description
0,Castro,CL,-42.4721,-73.7732,57.38,90,90,11.50,light intensity shower rain
1,Ilulissat,GL,69.2167,-51.1000,15.82,35,100,12.66,overcast clouds
2,Dongsheng,CN,39.8161,109.9776,16.03,53,5,14.03,clear sky
3,Lufilufi,WS,-13.8745,-171.5986,78.58,76,47,19.82,light rain
4,Longyearbyen,SJ,78.2186,15.6401,12.04,57,0,12.66,clear sky
...,...,...,...,...,...,...,...,...,...
661,Pedasi,PA,7.5333,-80.0333,84.07,77,98,13.27,overcast clouds
662,Orlik,RU,52.5178,99.8250,-15.59,92,100,2.59,overcast clouds
663,Sinnar,SD,13.5691,33.5672,71.83,18,99,10.31,overcast clouds
664,Parrita,CR,9.5201,-84.3225,91.17,71,49,4.05,scattered clouds
