In [1]:
path = 

In [5]:
import os
from os.path import exists
from os.path import join
import gdown
import shutil
import json
import torch
import math
import numpy as np
import pandas as pd
from zipfile import ZipFile

import torchvision

import matplotlib.pyplot as plt
import csv

class EpaDataset:

    # Default local work path
    work_def = '/data/epa/epa_dataset.csv'   

    # Default download url path
    url_def = 'https://www.fueleconomy.gov/feg/epadata/vehicles.csv'

    def __init__(self,
                work: str = work_def,
                persist: str = None,
                url: str = url_def):

        # Retain arguments
        self.work    = work
        self.persist = persist
        self.url     = url

        # Fetch data to work file
        EpaDataset.fetch_csv(work, persist, url)

        # Store work directory
        self.work_dir = os.path.split(work)[0]
        
        self.df = pd.read_csv(work, skipinitialspace=True, low_memory=False)

    def __len__(self):

        # Number of rows in the data
        return len(self.df)
    
    def get_make_model(self):
        
        mm = self.df[['make','model']].drop_duplicates()
        
        l = [(r['make'],r['model']) for index, r in mm.iterrows()]
        
        return l
    
    def get_make(self):
        
        mm = self.df[['make']].drop_duplicates()
        
        l = [r['make'] for index, r in mm.iterrows()]
        
        return l    

    @classmethod
    def fetch_csv(cls, work: str, persist: str, url: str):
        """If the csv file specified by work does not exist, attempt to
        copy it from persist. If that doesn't work attempt to download it from
        url to work and optionally copy it to persist. Return true if the file
        exists at work on exit.

        Args:
            work (str): specifies full path to csv
            persist (str): optionally specifies full path to persistent copy of
            csv
            url (str): specifies url from which to download csv in lieu
            of work and persist 

        Returns:
            Boolean: True if work has csv on exit
        """

        try:

            # Split work path
            work_dir = os.path.split(work)[0]

            # Coerce work directory into existence
            os.makedirs(work_dir, exist_ok=True)

            # If work file already exists...
            if os.path.exists(work):

                # Log progress
                print('Work file {0} is already available.'.format(work))

                # Nothing to do
                return True

            # If persist file exists...
            if persist is not None and os.path.exists(persist):

                # Copy from persist to work
                shutil.copyfile(persist, work)
                      
                # Log progress
                print('Copied work file from {0} to {1}.'.format(
                    persist, work))

                # Nothing more to do
                return True

            # Download from url to work
            gdown.download(url, work, quiet=False)

            # Log progress
            print('Downloaded work file from {0} to {1}.'.format(url, work))

            # If persist specified...
            if persist is not None:

                # Split persist path
                persist_dir = os.path.split(persist)[0]

                # Coerce work directory into existence
                os.makedirs(persist_dir, exist_ok=True)

                # Copy from work to persist
                shutil.copyfile(work, persist)

                # Log progress
                print('Copied work file from {0} to {1}.'.format(
                    work, persist))

                # Nothing more to do
                return True

        except:

            # Log progress
            print('Failed to fetch {0}.'.format(work))

            # No joy
            return False

if __name__ == '__main__':

    # Instantiate dataset
    epa = EpaDataset()
    
    l = epa.get_make_model()
    #l = epa.get_make()
    l.sort()
    
    with open('epa_make_model.csv','w') as out:
        out.write('epa-make,epa-model\n')
        for r in l:
            
            out.write(r[0].replace(',','x')+','+r[1].replace(',','x')+'\n')
    
    #print(l)
    #print(len(l))

    # Print length
    # print('EpaDataset length: {0}'.format(len(Epa)))

Work file /data/epa/epa_dataset.csv is already available.
