In [1]:
import os
import sys
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [2]:
import utils.downloading
import utils.files
from utils.progress.log_progress import log_progress
from utils.files.file_helper import ensure_directory, get_all_files_from_subfolders
import utils.configuration
import numpy as np
import pandas as pd



In [3]:
config = utils.configuration.Configuration()

In [4]:
HISTORICAL_DATA_FOLDER = config.config['DEFAULT']['FLIGHT_DATA']
WEATHER_DATA_FOLDER = config.config['DEFAULT']['WEATHER_DATA']
WEATHER_API_KEY = config.config['DEFAULT']['NOAA_WEATHER_API_KEY']
WEATHER_CITY_CODES = config.config['DEFAULT']['WEATHER_CITY_CODES']
YEARS_TO_DOWNLOAD = config.config['DEFAULT']['WORKING_YEARS']

In [5]:
def download_weather_data(downloader, city_list, country, year_list, save_folder):
    logging.info('Starting to download weather data')
    for year in year_list:
        download_yearly_weather_data(downloader, city_list, country, year, save_folder)

In [6]:
def download_yearly_weather_data(downloader, city_list, country, year, save_folder):
    start_date = '{}-01-01'.format(year)
    end_date = '{}-01-01'.format(year + 1)
    
    logging.info('Requesting data for {}'.format(year))
    
    error_city_list = []
    
    for city in city_list:
        try:
            logging.info('Requesting data for city: {}'.format(city))
            city_weather = weather_downloader.get_weather_for_city_by_name(city, country, start_date, end_date)
            logging.info('Saving data for city: {}'.format(city))
            save_folder_name = get_folder_for_year(save_folder, year)
            ensure_directory(save_folder_name)

            city_weather.to_csv('{}/{}.csv'.format(save_folder_name, city))
        except Exception as e:
            logging.info('No weather found for city {}'.format(city))
            error_city_list.append(city)

    logging.info('Error cities: {}'.format(error_city_list))

In [7]:
def get_folder_for_year(folder_to_save, year):
    return '{}/{}'.format(folder_to_save, year)

In [8]:
def get_existing_airports_names(historical_data_folder):
    historical_files_list = get_all_files_from_subfolders(HISTORICAL_DATA_FOLDER)
    
    name_list = set()
    
    for file in historical_files_list:
        file_data = pd.read_csv(file)
        unique_city_names = np.unique(file_data['ORIGIN_CITY_NAME'])        
        name_list.update(unique_city_names)
        
    return list(name_list)

In [9]:
weather_downloader = utils.downloading.NOAAWeatherDownloader(WEATHER_API_KEY, WEATHER_CITY_CODES)

In [10]:
city_names = get_existing_airports_names(HISTORICAL_DATA_FOLDER)

  if self.run_code(code, result):


In [None]:
download_weather_data(weather_downloader, city_names, 'US', YEARS_TO_DOWNLOAD, WEATHER_DATA_FOLDER)

INFO:root:Starting to download weather data
INFO:root:Requesting data for 2017
INFO:root:Requesting data for city: Grand Rapids, MI
INFO:root:Saving data for city: Grand Rapids, MI
INFO:root:Requesting data for city: Fort Smith, AR
INFO:root:Saving data for city: Fort Smith, AR
INFO:root:Requesting data for city: Lanai, HI
ERROR:root:Could not find city with the specified name: Lanai, HI
INFO:root:No weather found for city Lanai, HI
INFO:root:Requesting data for city: Aspen, CO
ERROR:root:Could not find city with the specified name: Aspen, CO
INFO:root:No weather found for city Aspen, CO
INFO:root:Requesting data for city: Columbia, MO
INFO:root:Saving data for city: Columbia, MO
INFO:root:Requesting data for city: South Bend, IN
INFO:root:Saving data for city: South Bend, IN
INFO:root:Requesting data for city: Fayetteville, NC
INFO:root:Saving data for city: Fayetteville, NC
INFO:root:Requesting data for city: North Bend/Coos Bay, OR
ERROR:root:Could not find city with the specified n

INFO:root:Requesting data for city: Los Angeles, CA
