In [11]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [12]:
%matplotlib inline 

import os
import numpy as np
import pandas as pd
import urllib.request
from pathlib import Path
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from sklearn.linear_model import LinearRegression

In [13]:
working_dir = Path('/content/gdrive/My Drive/Colab Notebooks/Applied_Machine_Learning/Data/Auto_MPG')
working_dir

PosixPath('/content/gdrive/My Drive/Colab Notebooks/Applied_Machine_Learning/Data/Auto_MPG')

The helper function below checks if a given file exists and downloads it if it does not exist.

In [14]:
def file_download(path, url):
    """ 
    Purpose: 
            This function downloads a given file 
            if it does not already exist.
    Parameters:
            path - path to the file in PosixPath format
            url - url for downloading the file if it does
                  not exist already        
    """
    if not os.path.exists(path):
        # Exctract the filename from the path
        file_name=path.name
        # Download the file 
        print(f"Downloading {file_name}...")
        urllib.request.urlretrieve(url, path)

In [15]:
url_data = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
url_names = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.names"

# Create the 'Data' directory if it does not exist
if not os.path.exists(working_dir):
    os.mkdir(working_dir)

path_data = working_dir/"auto-mpg.data"
path_names = working_dir/"auto-mpg.names"

In [16]:
file_download(path_data, url_data)
file_download(path_names, url_names)

In [17]:
col_names = ['MPG','Cylinders','Displacement',
             'Horsepower','Weight', 'Acceleration', 
             'Year', 'Origin', 'Name']

In [18]:
df = pd.read_table(working_dir/"auto-mpg.data", names=col_names, delim_whitespace=True, na_values='?')

In [19]:
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Year,Origin,Name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [20]:
df.to_csv(working_dir/'auto-mpg.csv', index=False)
np.save(working_dir/'displacement', df['Displacement'].values)
np.save(working_dir/'horsepower', df['Horsepower'].values)
np.save(working_dir/'mpg', df['MPG'].values)
np.save(working_dir/'weight', df['Weight'].values)