In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('test_data_clean_uak211.csv')

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,date,station_id,test_type,test_result,vin,model_year,vehicle_type,vehicle_class,gross_vehicle_weight_rating
0,1,01/01/2017,456.0,OBDII,Pass,5N1AN08W97C530793,2007.0,Truck,LDT1,5400.0
1,2,01/01/2017,1115.0,OBDII,Pass,WBANB53537CP05224,2007.0,Passenger Car,LDV,5026.0
2,3,01/01/2017,1109.0,OBDII,Pass,JN8AZ28R49T111532,2009.0,Passenger Car,LDT1,3848.0
3,4,01/01/2017,1115.0,OBDII,Pass,1N6AA07C18N350248,2008.0,Truck,LDT1,7200.0
4,5,01/01/2017,1115.0,Idle,Fail,1J4GZ88S5PC113745,1993.0,Passenger Car,LDV,5500.0


## 1. Prepare OBDII test only data
    - filter out the OBDII data
    - merge the OBDII data with verified csv (with price for further use)
    - merge the OBDII data with station location (lat/long) for mapping

In [4]:
OBDII_data = data[data['test_type'] == 'OBDII']

In [5]:
OBDII_data = pd.DataFrame(OBDII_data.groupby(['station_id'])['test_result'].count()).reset_index()

In [6]:
OBDII_data.head()

Unnamed: 0,station_id,test_result
0,1.0,2099
1,2.0,1916
2,11.0,627
3,24.0,1515
4,28.0,1029


In [7]:
station_info = pd.read_csv('verified_avg_fee_by_station_jx.csv')
station_info.head()

Unnamed: 0,No.,Station Id,Avg. Test Fees,Station Name,Address,City,ZIP,Phone Number,Price,Tax,On the Spot Registration,Registration/Sticker Fee,Service Type,Detailed Information
0,1.0,2.0,$25,KEN GARFF MERCEDES BENZ,575 SOUTH STATE,SALT LAKE CITY,84111.0,855-780-1053,25,0,Na,Na,TestAndRepair,Na
1,2.0,24.0,$39,OLYMPUS HILLS SINCLAIR,3905 SOUTH WASATCH BLVD,SALT LAKE CITY,84124.0,801- 272-2081,39,0,Na,Na,TestAndRepair,Na
2,3.0,28.0,$31,UNION SERVICE,997 EAST 7220 SOUTH,MIDVALE,84047.0,801-255-4122,Na,Na,Na,Na,Na,Na
3,4.0,38.0,$30,LARRY H. MILLER TOYOTA,5650 S STATE ST,MURRAY,84107.0,801-264-3850,30,0,Yes,5,TestAndRepair,Na
4,5.0,46.0,$35,QUALITY TIRE CO.,1335 WEST 2100 SOUTH,SALT LAKE CITY,84119.0,801-972-1944,Na,Na,Na,Na,Na,Na


In [8]:
OBDII_data = pd.merge(OBDII_data, station_info, how='left', left_on ='station_id', right_on = 'Station Id')

In [26]:
OBDII_data.shape

(522, 16)

In [32]:
station_latlng = pd.read_csv('SLCo_station_latlong_v1_yc.csv')
station_latlng = station_latlng[['Station Id', 'Latitude', 'Longitude']]
station_latlng['Station Id'] = station_latlng['Station Id'].astype(int)
station_latlng.head()

In [36]:
OBDII_data = pd.merge(OBDII_data, station_latlng, left_on = 'station_id', right_on = 'Station Id', how='right')
OBDII_data.shape

## 2. Prepare all test type
    - merge all test data with verified csv (with price for further use)
    - merge all test data with station location (lat/long) for mapping

In [40]:
All_data = pd.DataFrame(data.groupby(['station_id'])['test_result'].count()).reset_index()

In [41]:
All_data.head()

Unnamed: 0,station_id,test_result
0,1.0,2938
1,2.0,1961
2,11.0,797
3,24.0,1635
4,28.0,1167


In [42]:
All_data = pd.merge(All_data, station_info, how='left', left_on ='station_id', right_on = 'Station Id')

In [43]:
All_data = pd.merge(All_data, station_latlng, left_on = 'station_id', right_on = 'Station Id', how='right')

In [44]:
All_data.head()

Unnamed: 0,station_id,test_result,No.,Station Id_x,Avg. Test Fees,Station Name,Address,City,ZIP,Phone Number,Price,Tax,On the Spot Registration,Registration/Sticker Fee,Service Type,Detailed Information,Station Id_y,Latitude,Longitude
0,2.0,1961.0,1.0,2.0,$25,KEN GARFF MERCEDES BENZ,575 SOUTH STATE,SALT LAKE CITY,84111.0,855-780-1053,25,0,Na,Na,TestAndRepair,Na,2,40.756698,-111.887756
1,24.0,1635.0,2.0,24.0,$39,OLYMPUS HILLS SINCLAIR,3905 SOUTH WASATCH BLVD,SALT LAKE CITY,84124.0,801- 272-2081,39,0,Na,Na,TestAndRepair,Na,24,40.686708,-111.795525
2,28.0,1167.0,3.0,28.0,$31,UNION SERVICE,997 EAST 7220 SOUTH,MIDVALE,84047.0,801-255-4122,Na,Na,Na,Na,Na,Na,28,40.619111,-111.862117
3,38.0,11557.0,4.0,38.0,$30,LARRY H. MILLER TOYOTA,5650 S STATE ST,MURRAY,84107.0,801-264-3850,30,0,Yes,5,TestAndRepair,Na,38,40.648415,-111.88955
4,46.0,4954.0,5.0,46.0,$35,QUALITY TIRE CO.,1335 WEST 2100 SOUTH,SALT LAKE CITY,84119.0,801-972-1944,Na,Na,Na,Na,Na,Na,46,40.725605,-111.929081
