In [1]:
#Importing necessary libraries
import requests
import numpy as np
import pandas as pd
from ipynb.fs.full.FinalProjectBKudaimiPart1 import *
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Function that takes a car make and model as input, pulls the car dimension data in metric units of the most recent model 
#year from the NHTSA database, and returns the car information as a dictionary 

def car_information_puller(car_make, car_model):
    try:    
        #Capitalizing both make and model names to insert into the URL
        car_make = str(car_make).upper()
        car_model = str(car_model).upper()
        URL = 'https://vpic.nhtsa.dot.gov/api/vehicles/GetCanadianVehicleSpecifications/?year=2019&make={}&model={}&units=metric&format=json'.format(car_make, car_model)
        
        #Passing the built URL into the requests library and returning a JSON containing the info I seek for each car
        response = requests.get(URL)
        organized_data = response.json()
        
        #If the returned JSON displays a success message and a count of cars, then the function proceeds.
        if organized_data['Count'] != 0 and organized_data['Message'] == 'Results returned successfully':
            
            #The JSON stores the numbers in a section called 'Results' so that section will be taken
            car_results = organized_data['Results']
            
            #The JSON results will display a list of all trims for a model. It doesn't matter which trim is selected, as 
            #all trims of a car model have the same dimensions. I will select the first (base) trim from the list
            #and pull up the dimensions of the model.
            dimensions = car_results[0]['Specs']
            
            #The car specs are always ordered in this same order: Make name, model name, year, length, width, height,
            #wheelbase, curb weight, A-G (misc. dimensions), front and rear track width, and weight distribution. Because of
            #this, I can use the same indices to pull the data (A-G won't be useful so I won't pull them).
            make_name = dimensions[0]['Value']    
            model_name = dimensions[1]['Value']
            length = dimensions[3]['Value']
            width = dimensions[4]['Value']
            height = dimensions[5]['Value']
            wheelbase = dimensions[6]['Value']
            curb_weight = dimensions[7]['Value']
            TWF = dimensions[15]['Value']
            TWR = dimensions[16]['Value']
            WD = dimensions[17]['Value']
            
            #Inserting the information into a dictionary with human-readable labels
            results_dict = {'Make' : make_name, 'Model' : model_name, 'Length (cm)' : length, 'Width (cm)' : width, 
                           'Height (cm)' : height, 'Wheelbase (cm)' : wheelbase, 'Curb Weight (kg)' : curb_weight, 
                            'Front Track Width (cm)' : TWF, 'Rear Track Width (cm)' : TWR, 'Weight Distribution (Front/Rear)' : WD}
            return results_dict
        else:
            #If no car of a make or model could be found, then a message will be shown in its place
            return 'No data'
    except:
        print('Error in connecting to the NHTSA database.')



In [3]:
#Getting the list of car makes (and model for each make) from Part 1 of my final project

car_models = list(cars.groupby('Make')['Model'])

In [4]:
#The list of car makes and models obtained from Part 1 of my project is passed into the above function, and the data for 
#each car is appended into an empty list

info_list = []
for i in range(len(car_models)):
    for j in range(len(car_models[i][1])):
        info_list.append(car_information_puller(car_models[i][0], car_models[i][1][car_models[i][1].index[j]]))   

In [5]:
#Removing all instances of no car data found to ensure the data frame comes out clean
while 'No data' in info_list: 
    info_list.remove('No data') 

In [7]:
#Converting the car data list into a data frame
car_dims = pd.DataFrame(info_list)

car_dims

Unnamed: 0,Make,Model,Length (cm),Width (cm),Height (cm),Wheelbase (cm),Curb Weight (kg),Front Track Width (cm),Rear Track Width (cm),Weight Distribution (Front/Rear)
0,ACURA,ILX 4DR SEDAN,462,180,141,267,1415,151,152,60/40
1,ACURA,MDX 4DR SUV AWD,498,196,171,282,1970,169,168,58/42
2,ACURA,NSX,447,194,122,263,1725,166,162,42/58
3,ACURA,RDX 4DR SUV,474,190,167,275,1830,163,164,57/43
4,ACURA,TLX 4DR SEDAN FWD,484,185,145,278,1596,160,160,60/40
...,...,...,...,...,...,...,...,...,...,...
301,VOLVO,V60 4DR WAGON,476,185,150,287,1723,160,160,56/44
302,VOLVO,V90 4DR WAGON,494,189,144,294,1826,163,163,54/46
303,VOLVO,XC40 4DR SUV,443,186,165,270,1682,160,163,
304,VOLVO,XC60 4DR SUV,469,190,166,287,1866,166,166,55/45


In [8]:
#Dropping the trim names from the model by splitting the model names and only returning the first word to the series.
A = car_dims['Model'].str.split()
new_models = []

for item in A:
    
    #A few car brands have model names with multiple words, so they will have the subsequent words returned as well.
    if item[0] == 'GRAND' or item[0] == 'SANTA' or item[0] == 'NEW' or item[0] == 'FLYING' or item[0] == 'RANGE':
        new_models.append(item[0] + ' ' + item[1])
    elif item[0] == 'TOWN':
        new_models.append(item[0] + ' ' + item[1] + ' ' + item[2])
    else:
        new_models.append(item[0])

In [9]:
#Setting these trimmed model names as the new model column and deleting the old column
car_dims['Model1'] = pd.Series(new_models)
car_dims['Model'] = car_dims['Model1']
del car_dims['Model1']


In [10]:
#Once again, Mazda's make name must be removed from the model name to allow for easier data frame joining.
car_dims[car_dims['Make'] == 'MAZDA'] = car_dims[car_dims['Make'] == 'MAZDA'].replace('MAZDA3', '3')
car_dims[car_dims['Make'] == 'MAZDA'] = car_dims[car_dims['Make'] == 'MAZDA'].replace('MAZDA6', '6')

#Cleaning up a few individual entries a bit more
car_dims[car_dims['Make'] == 'LEXUS'] = car_dims[car_dims['Make'] == 'LEXUS'].replace('GX460', 'GX')


In [11]:
#There are a few points that are missing directly from the NHTSA JSON files, thus leading to missing points 
#in the data frame. I will replace them with the phrase 'No data'
car_dims.replace('', 'No data', inplace = True)

In [12]:
#The phrase 'no data' is still not useful, but it will allow me to remove the rows easier.
#I will remove the rows that do not contain a full set of car dimension data
for col in list(car_dims.columns):
    car_dims = car_dims[~car_dims[col].str.contains("No data")]
    
car_dims

Unnamed: 0,Make,Model,Length (cm),Width (cm),Height (cm),Wheelbase (cm),Curb Weight (kg),Front Track Width (cm),Rear Track Width (cm),Weight Distribution (Front/Rear)
0,ACURA,ILX,462,180,141,267,1415,151,152,60/40
1,ACURA,MDX,498,196,171,282,1970,169,168,58/42
2,ACURA,NSX,447,194,122,263,1725,166,162,42/58
3,ACURA,RDX,474,190,167,275,1830,163,164,57/43
4,ACURA,TLX,484,185,145,278,1596,160,160,60/40
...,...,...,...,...,...,...,...,...,...,...
300,VOLVO,S90,496,189,144,294,1840,163,163,56/44
301,VOLVO,V60,476,185,150,287,1723,160,160,56/44
302,VOLVO,V90,494,189,144,294,1826,163,163,54/46
304,VOLVO,XC60,469,190,166,287,1866,166,166,55/45


In [14]:
#Dropping any duplicates that may be still in the data frame
car_dims.drop_duplicates()

Unnamed: 0,Make,Model,Length (cm),Width (cm),Height (cm),Wheelbase (cm),Curb Weight (kg),Front Track Width (cm),Rear Track Width (cm),Weight Distribution (Front/Rear)
0,ACURA,ILX,462,180,141,267,1415,151,152,60/40
1,ACURA,MDX,498,196,171,282,1970,169,168,58/42
2,ACURA,NSX,447,194,122,263,1725,166,162,42/58
3,ACURA,RDX,474,190,167,275,1830,163,164,57/43
4,ACURA,TLX,484,185,145,278,1596,160,160,60/40
...,...,...,...,...,...,...,...,...,...,...
300,VOLVO,S90,496,189,144,294,1840,163,163,56/44
301,VOLVO,V60,476,185,150,287,1723,160,160,56/44
302,VOLVO,V90,494,189,144,294,1826,163,163,54/46
304,VOLVO,XC60,469,190,166,287,1866,166,166,55/45
