In [1]:
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pickle

### Import functions unique to this project

In [2]:
from master_functions import get_car_urls
from master_functions import make_model_df

## MERCEDES-BENZ - Create master dataframe for all models

This worksheet is intended to build a datafame for all **Mercedes-Benz** cars.  There are some 'quirks' in how the data is structured from its source on https://www.fueleconomy.gov/, so more manual steps are taken below to check files for issues, combine first what is 'normal' and then add in those that required special attention.

**Step #1:** Create unique urls for every car model for the years 1984 - 2021<br>
- Uses `get_car_urls` from master function, Inputs: (car_make, [list of all models])

In [3]:
mercedes_urls = get_car_urls('Mercedes-Benz',
                             ['190','200','230',
                              '260','300','350',
                              '380','400','420',
                              '500','560','600',
                              'A-Class','AMG GT',
                              'B-Class Electric Drive','C-Class',
                              'CL-Class','CLA-Class','CLK-Class',
                              'CLS-Class','E-Class','G-Class',
                              'GL-Class','GLA-Class','GLB-Class',
                              'GLC-Class','GLE-Class','GLK-Class',
                              'GLS-Class','Maybach','Metris Cargo',
                              'Metris Passenger','ML-Class','R-Class',
                              'S-Class','SL-Class','SLC-Class',
                              'SLK-Class','SLR','SLS AMG'
                             ])

-

**Step #2:** Get length of list created in Step 1.  This number will be how many times you run the function in Step 3 to check all of the urls<br>

In [4]:
# Verify number of urls and use this number
# to know how many urls need to 'check below'

len(mercedes_urls)

40

-

**Step #3:** Check all of the urls you just created.<br>
- If does not work, add to 'problem' URLs string below this cell

In [46]:
# Test area for each url with [carmake]_urls[index]
# by seeing if data appears correctly

make_model_df('Mercedes-Benz',mercedes_urls[39])

Unnamed: 0,year,make,model,capacity_liters,cylinders,transmission,trans_speed,fuel_type,gg_emissions,mpg
0,2012,Mercedes-Benz,SLS AMG,6.2,8,Automatic,AM7,Premium Gasoline,555,16
1,2011,Mercedes-Benz,SLS AMG,6.2,8,Automatic,AM7,Premium Gasoline,555,16
2,2012,Mercedes-Benz,SLS AMG Roadster,6.2,8,Automatic,AM7,Premium Gasoline,555,16
3,2014,Mercedes-Benz,SLS AMG Coupe,6.2,8,Automatic,AM7,Premium Gasoline,589,15
4,2013,Mercedes-Benz,SLS AMG Coupe,6.2,8,Automatic,AM7,Premium Gasoline,589,15
5,2015,Mercedes-Benz,SLS AMG GT Coupe,6.2,8,Automatic,AM7,Premium Gasoline,589,15
6,2014,Mercedes-Benz,SLS AMG GT Coupe,6.2,8,Automatic,AM7,Premium Gasoline,589,15
7,2013,Mercedes-Benz,SLS AMG GT Coupe,6.2,8,Automatic,AM7,Premium Gasoline,589,15
8,2015,Mercedes-Benz,SLS AMG GT Roadster,6.2,8,Automatic,AM7,Premium Gasoline,589,15
9,2014,Mercedes-Benz,SLS AMG GT Roadster,6.2,8,Automatic,AM7,Premium Gasoline,589,15


-

Populate this section so if there are any 'problem' URLs from your test above

In [47]:
#'Problem' URLs
'''
mercedes_urls[14]
mercedes_urls[15]
mercedes_urls[17]
mercedes_urls[23]
mercedes_urls[25]
mercedes_urls[26]
mercedes_urls[34]
'''

#Print list length again to 
#set length of range in next cell
len(mercedes_urls)

40

-

**Step #4:** Create dfs for all 'okay' urls and place each into a master list
- Automate where possible, but some may need to be added one by one to avoid 'problem' urls

In [48]:
# for 'normal' urls to make a df and add to master df list, automate it!

mercedes_dfs = []

mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[24]))

for x in range(0,14):
    mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[x]))

mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[16]))

for x in range(18,23):
    mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[x]))

mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[24]))

for x in range(27,34):
    mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[x]))

for x in range(35,40):
    mercedes_dfs.append(make_model_df('Mercedes-Benz',mercedes_urls[x]))
    



-

**Step #5:** Concatenate all of the 'normal' car model dfs into one master dataframe

In [50]:
mercedes_dfs = pd.concat(mercedes_dfs, ignore_index=True)

mercedes_dfs

Unnamed: 0,year,make,model,capacity_liters,cylinders,transmission,trans_speed,fuel_type,gg_emissions,mpg
0,2021.0,Mercedes-Benz,GLB250,2.0,4.0,Automatic,AM8,Premium Gasoline,337.0,26.0
1,2020.0,Mercedes-Benz,GLB250 4matic,2.0,4.0,Automatic,AM8,Premium Gasoline,338.0,26.0
2,2020.0,Mercedes-Benz,GLB250,2.0,4.0,Automatic,AM8,Premium Gasoline,344.0,26.0
3,1984.0,Mercedes-Benz,190 D 2.2/190 E 2.3,2.2,4.0,Manual,5,Diesel,339.0,30.0
4,1985.0,Mercedes-Benz,190,2.2,4.0,Manual,5,Diesel,339.0,30.0
...,...,...,...,...,...,...,...,...,...,...
249,2015.0,Mercedes-Benz,SLS AMG GT Coupe,6.2,8.0,Automatic,AM7,Premium Gasoline,589.0,15.0
250,2014.0,Mercedes-Benz,SLS AMG GT Coupe,6.2,8.0,Automatic,AM7,Premium Gasoline,589.0,15.0
251,2013.0,Mercedes-Benz,SLS AMG GT Coupe,6.2,8.0,Automatic,AM7,Premium Gasoline,589.0,15.0
252,2015.0,Mercedes-Benz,SLS AMG GT Roadster,6.2,8.0,Automatic,AM7,Premium Gasoline,589.0,15.0


-

**Step #6:** Pickle the dataframe made in Step 6 of all car's models with 'normal' dataframes
- Will now be saved so further work on dataframe can start at this place

In [51]:
with open('pickles/mercedes_dfs.pickle', 'wb') as to_write:
    pickle.dump(mercedes_dfs, to_write)

-

**Step #7:** Un-pickle the dataframe made in Step 7 of all car's models with 'normal' dataframes

In [52]:
with open('pickles/mercedes_dfs.pickle','rb') as read_file:
    mercedes_dfs = pickle.load(read_file)