In [2]:
import requests
import os
import time
import datetime

In [3]:
def retrieve_data(dataType, city):
    """
    Retrieve live data and save it in folders
    
    params: dataType: string, options: ['meteorology', 'airquality', 'grid']
    params: city: string, options: ['beijing', 'london']
    return: None
    """
    cityAbbr = 'bj' if city == 'beijing' else 'ld'
    
    # Create directories for each city
    path = '../raw_data/{}'.format(city)
    if not os.path.isdir(path):
        os.makedirs(path)

    # Create directories for each kind of data
    path = '../raw_data/{0}/{1}'.format(city, dataType)
    if not os.path.isdir(path):
        os.makedirs(path)

    # Initialize date variables
    utcTime = datetime.datetime.utcnow()
    utcToday = datetime.date(year=utcTime.year, month=utcTime.month, day=utcTime.day)

    # Retrieve data day by day, from 2018/03/31 until now
    currentDate = datetime.date(year=2018, month=3, day=31)
    while currentDate <= utcToday:
        filename = '{0}_{1}_{2:%Y}{2:%m}{2:%d}.csv'.format(cityAbbr, dataType, currentDate)
        path = '../raw_data/{0}/{1}/{2}'.format(city, dataType, filename)
        if not os.path.isfile(path) or currentDate == utcToday:
            if dataType == 'grid':
                input = '{}_{}'.format(cityAbbr, dataType)
                url = 'https://biendata.com/competition/meteorology/{0}/{1}-0/{1}-23/2k0d1d8'.format(input, currentDate)
            else:
                url = 'https://biendata.com/competition/{0}/{1}/{2}-0/{2}-23/2k0d1d8'.format(dataType, cityAbbr, currentDate)

            response = requests.get(url)
            if response.text == 'None':
                print("No data in {}".format(filename))
                pass
            else:
                with open(path, 'w') as f:
                    f.write(response.text)
                print("{}: Retrieved".format(path))

        # Set currentDate to the next day
        currentDate += datetime.timedelta(days=1)

In [6]:
retrieve_data('airquality', 'london')
print('Air Quality in London: Done')

retrieve_data('grid', 'london')
print('Grid Meteorology in London: Done')

../raw_data/london/airquality/ld_airquality_20180331.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180401.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180402.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180403.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180404.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180405.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180406.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180407.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180408.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180409.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180410.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180411.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180412.csv: Retrieved
../raw_data/london/airquality/ld_airquality_20180413.csv: Retrieved
../raw_data/london/airquality/ld_airquality_2018

No data in ld_airquality_20180825.csv
No data in ld_airquality_20180826.csv
No data in ld_airquality_20180827.csv
No data in ld_airquality_20180828.csv
No data in ld_airquality_20180829.csv
No data in ld_airquality_20180830.csv
No data in ld_airquality_20180831.csv
No data in ld_airquality_20180901.csv
No data in ld_airquality_20180902.csv
No data in ld_airquality_20180903.csv
No data in ld_airquality_20180904.csv
No data in ld_airquality_20180905.csv
No data in ld_airquality_20180906.csv
No data in ld_airquality_20180907.csv
No data in ld_airquality_20180908.csv
No data in ld_airquality_20180909.csv
No data in ld_airquality_20180910.csv
No data in ld_airquality_20180911.csv
No data in ld_airquality_20180912.csv
No data in ld_airquality_20180913.csv
No data in ld_airquality_20180914.csv
No data in ld_airquality_20180915.csv
No data in ld_airquality_20180916.csv
No data in ld_airquality_20180917.csv
No data in ld_airquality_20180918.csv
No data in ld_airquality_20180919.csv
No data in l

No data in ld_airquality_20190329.csv
No data in ld_airquality_20190330.csv
No data in ld_airquality_20190331.csv
No data in ld_airquality_20190401.csv
No data in ld_airquality_20190402.csv
No data in ld_airquality_20190403.csv
No data in ld_airquality_20190404.csv
No data in ld_airquality_20190405.csv
No data in ld_airquality_20190406.csv
No data in ld_airquality_20190407.csv
No data in ld_airquality_20190408.csv
No data in ld_airquality_20190409.csv
No data in ld_airquality_20190410.csv
No data in ld_airquality_20190411.csv
No data in ld_airquality_20190412.csv
No data in ld_airquality_20190413.csv
No data in ld_airquality_20190414.csv
No data in ld_airquality_20190415.csv
No data in ld_airquality_20190416.csv
No data in ld_airquality_20190417.csv
No data in ld_airquality_20190418.csv
No data in ld_airquality_20190419.csv
No data in ld_airquality_20190420.csv
No data in ld_airquality_20190421.csv
No data in ld_airquality_20190422.csv
No data in ld_airquality_20190423.csv
No data in l

No data in ld_grid_20180831.csv
No data in ld_grid_20180901.csv
No data in ld_grid_20180902.csv
No data in ld_grid_20180903.csv
No data in ld_grid_20180904.csv
No data in ld_grid_20180905.csv
No data in ld_grid_20180906.csv
No data in ld_grid_20180907.csv
No data in ld_grid_20180908.csv
No data in ld_grid_20180909.csv
No data in ld_grid_20180910.csv
No data in ld_grid_20180911.csv
No data in ld_grid_20180912.csv
No data in ld_grid_20180913.csv
No data in ld_grid_20180914.csv
No data in ld_grid_20180915.csv
No data in ld_grid_20180916.csv
No data in ld_grid_20180917.csv
No data in ld_grid_20180918.csv
No data in ld_grid_20180919.csv
No data in ld_grid_20180920.csv
No data in ld_grid_20180921.csv
No data in ld_grid_20180922.csv
No data in ld_grid_20180923.csv
No data in ld_grid_20180924.csv
No data in ld_grid_20180925.csv
No data in ld_grid_20180926.csv
No data in ld_grid_20180927.csv
No data in ld_grid_20180928.csv
No data in ld_grid_20180929.csv
No data in ld_grid_20180930.csv
No data 

NameError: name 'start' is not defined

In [None]:
from glob import glob

def read_multiple_csv(path, col = None, parse_dates = None):

    # glob(path+'/*'): return a list, which consist of each files in path
    # tqdm is a package which shows the progressive bar on Pythton CLI
    if parse_dates == None:
        if col is None:
            df = pd.concat([pd.read_csv(f) for f in sorted(glob(path+'/*'))])
        else:
            df = pd.concat([pd.read_csv(f)[col] for f in sorted(glob(path+'/*'))])
    else:
        if col is None:
            df = pd.concat([pd.read_csv(f, parse_dates = ['date'] ) for f in sorted(glob(path+'/*'))])
        else:
            df = pd.concat([pd.read_csv(f, parse_dates = ['date'] )[col] for f in sorted(glob(path+'/*'))])
    return df

In [7]:
stations = ['BL0', 'CD9', 'CD1', 'GN0', 'GR4', 'GN3', 'GR9', 'HV1', 'KF1', 'LW2', 'ST5', 'TH4', 'MY7']

# Initialize a directory
path = '../input/london'
if not os.path.isdir(path):
    os.makedirs(path)

In [8]:
if not os.path.isfile('../input/ld/london_aq_hist_data_merged.csv.gz'):

    # Read official historical data from file
    hist_data = pd.read_csv('../raw_data/London_historical_aqi_forecast_stations_20180331.csv', index_col=0)
    hist_data.columns = ['utc_time', 'station_id', 'PM2.5', 'PM10', 'NO2']
    hist_data = hist_data.fillna(0).drop_duplicates()

    # Merge latitude and longitude data
    aq_stations = pd.read_csv('../raw_data/London_AirQuality_Stations.csv', index_col=0)
    hist_data = hist_data.join(aq_stations[['Latitude', 'Longitude']], on='station_id')
    hist_data = hist_data.rename(columns={'Latitude': 'latitude', 'Longitude': 'longitude'})
    hist_data = hist_data.reindex(columns=['station_id', 'utc_time', 'longitude', 'latitude', 'PM2.5', 'PM10', 'NO2'])

    # Save the file
    hist_data.to_csv('../input/london/london_aq_hist_data_merged.csv.gz', index=False, compression='gzip')

FileNotFoundError: File b'../raw_data/London_historical_aqi_forecast_stations_20180331.csv' does not exist