In [162]:
import pandas as pd
import requests
import io
from os.path import exists
import os

# Data Keys
keys = ['AIRPORT_DATA']
KEYS = dict(zip(keys, keys))

# DataLoader Class
A generic class to load data from a URL source, and optionally drop and/or rename columns.
Only handles loading and basic processing, everything else should be done through the DF itself.

In [163]:
class DataLoader():
    KWARGS = [
        'column_remapping',
        'columns_to_keep'
    ]
    ALLOWED_FILETYPES = ['csv', 'json']
    
    def __init__(self, data_name, source_type, source_url = '', **kwargs):
        if source_url == '':
            raise Exception('Must provide source_url or source_filepath for data')
        
        if source_type not in DataLoader.ALLOWED_FILETYPES:
            raise Exception(f"{source_type} not in allowed types")
        
        if data_name != data_name.upper():
            raise Exception('data_name must be uppercase')
        
        self.data_name = data_name
        self.source_type = source_type
        self.source_url = source_url
        self.df = None
        self.filepath = f"data/{self.data_name}.csv"
        
        self.column_remapping = kwargs.get('column_remapping', {})
        self.columns_to_keep = kwargs.get('columns_to_keep', None)
        
        try:
            os.mkdir("data")
        except OSError as error:
            pass
    
    def fetch_from_source(self):
        r = requests.get(self.source_url)
        df = self.df
        if r.ok:
            data = r.content.decode('utf8')
            if self.source_type == 'csv':
                df = pd.read_csv(io.StringIO(data))
            elif self.source_type == 'json':
                df = pd.read_json(io.StringIO(data))
            else:
                raise Exception("Unsupported data type")
            self.df = df
        else:
            print(r.reason)
            print(r.text)
            raise Exception(f"Error loading {self.data_name}")
    
    def load_from_file(self):
        self.df = pd.read_csv(self.filepath)
    
    def _check_file_existance(self):
        return exists(self.filepath)
    
    def save_to_file(self):
        if self.df is not None:
            self.df.to_csv(self.filepath)
        else:
            raise Exception("No data available to save")
    
    def _transform_source(self):
        # rename columns based on user-provided remapping
        self.df.rename(columns = self.column_remapping, inplace=True)
        self.df.reset_index(drop=True)
        
        # drop all columns not explicitly included in user-provide column_remapping + columns_to_keep
        # tip: pass [] to columns_to_keep to drop everything not remapped
        if self.columns_to_keep is not None:
            columns_to_keep = set(self.columns_to_keep + list(self.column_remapping.values()))
            cols_to_drop = [col for col in self.df.columns if col not in columns_to_keep]
            self.df.drop(cols_to_drop, axis=1, inplace=True)
    
    def retrieve(self):
        # return the most-live data. In-mem df -> local file -> remote source
        if self.df is not None:
            return self.df
        elif self._check_file_existance():
            self.load_from_file()
            return self.df
        else:
            self.fetch_from_source()
            self._transform_source()
            self.save_to_file()
            return self.df

In [166]:
keys_to_remap = [
    'ARPT_NAME',
    'ARPT_ID',
    'ICAO_ID',
    'DIST_CITY_TO_AIRPORT',
    'FACILITY_USE_CODE',
    'STATE_CODE',
    'CITY'
]
airport_loader = DataLoader(
    KEYS['AIRPORT_DATA'],
    'csv',
    'https://opendata.arcgis.com/api/v3/datasets/e2e88905639b415abe621a6a861b4eca_0/downloads/data?format=csv&spatialRefId=3857&where=1%3D1',
    column_remapping = {
        **dict(zip(keys_to_remap, map(lambda x: x.lower(), keys_to_remap))),
        **{
            'LAT_DECIMAL': 'lat',
            'LNG_DECIMAL': 'lng',
        }
    },
    columns_to_keep = []
)

In [167]:
df = airport_loader.retrieve()
[print(col) for col in df.columns]
df


Unnamed: 0
state_code
arpt_id
city
arpt_name
facility_use_code
lat
dist_city_to_airport
icao_id


Unnamed: 0.1,Unnamed: 0,state_code,arpt_id,city,arpt_name,facility_use_code,lat,dist_city_to_airport,icao_id
0,0,AL,0J0,ABBEVILLE,ABBEVILLE MUNI,PU,31.600222,3.0,
1,1,AL,2A8,ADDISON,ADDISON MUNI,PU,34.217142,1.0,
2,2,AL,AL03,AKRON,STRICKLAND/SMALLEY FLD,PR,32.847500,3.0,
3,3,AL,EET,ALABASTER,SHELBY COUNTY,PU,33.177778,4.0,KEET
4,4,AL,AL39,ALABASTER,SHELBY MEDICAL CENTER,PR,33.252222,0.0,
...,...,...,...,...,...,...,...,...,...
19933,19933,,MBAC,BIG AMBERGRIS CAY,AMBERGRIS CAY INTL,PR,21.300633,0.0,MBAC
19934,19934,,MBGT,COCKBURN TOWN,JAGS MCCARTNEY INTL,PU,21.444544,2.0,MBGT
19935,19935,,MBNC,BOTTLE CREEK AND WHITBY,NORTH CAICOS INTL,PU,21.917486,0.0,MBNC
19936,19936,,MBPV,PROVIDENCIALES,PROVIDENCIALES INTL,PU,21.773600,1.0,MBPV
