In [1]:
### Module and data imports

import plotly
import plotly.plotly as py
import plotly.figure_factory as ff

from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import os

plotly.tools.set_credentials_file(username='jdanski', api_key='5Qa76vbljfI27xB5UwxW')

cwd = os.getcwd()
print(cwd)

used_cars = pd.read_csv("tc20171021.csv")
us_cities_counties = pd.read_csv("uscitiesv1.4.csv")

### cleaning data (drop exotic vehicles)
max_car_price = 50000.0
used_cars.drop(used_cars[used_cars.Price >= max_car_price].index, inplace=True)

C:\Users\James\Dropbox\DS\TDI_Challenge


In [2]:
city_to_fips = {}
for i, row in us_cities_counties.iterrows():
    city_to_fips[row['city']] = str(row['county_fips']).zfill(5)
    
used_cars['Fips'] = used_cars.apply(lambda row: city_to_fips[row['City']] if city_to_fips.get(row['City']) else float('nan'), axis=1)
used_cars['Code'] = used_cars.apply(lambda row: row['State'].strip().upper(), axis=1)
print (used_cars.keys())

Index(['Id', 'Price', 'Year', 'Mileage', 'City', 'State', 'Vin', 'Make',
       'Model', 'Fips', 'Code'],
      dtype='object')


In [3]:
#### filter all NaN values
used_cars.dropna()

#### Calculating the average age of a car per county (Fips)
ave_car_year_by_county = used_cars.groupby(['Fips'])['Year'].mean()

fips = ave_car_year_by_county.index.values
ave_car_age = float(2019) - ave_car_year_by_county.values

#### Calculating the average age of a car per state
ave_car_year_by_state = used_cars.groupby(['Code'])['Year'].mean()
ave_car_age_st = float(2019) - ave_car_year_by_state.values

#### Calculating other averages
ave_car_price_by_state = used_cars.groupby(['Code'])['Price'].mean()
norm_car_price_by_state = ave_car_price_by_state/np.mean(ave_car_price_by_state)
ave_car_mil_by_state = used_cars.groupby(['Code'])['Mileage'].mean()
norm_car_mil_by_state = ave_car_mil_by_state/np.mean(ave_car_mil_by_state)

ave_car_mil_by_make = used_cars.groupby(['Make'])['Mileage'].mean()
ave_car_price_by_make = used_cars.groupby(['Make'])['Price'].mean()
ave_car_age_by_make = used_cars.groupby(['Make'])['Year'].mean()
ave_car_age_make = float(2019) - ave_car_age_by_make.values

In [4]:
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

dataset = pd.DataFrame({'States':used_cars.Code.unique(),'Ave Age': [str(round(x,2)) for x in ave_car_age_st.tolist()], 'Norm Price': [str(round(x,2)) for x in norm_car_price_by_state.tolist()], 'Norm Mileage': [str(round(x,2)) for x in norm_car_mil_by_state.tolist()]})

dataset['text'] = dataset['States'] + '<br>' +\
     'Normalized Price '+ dataset['Norm Price'] + '<br>' + 'Normalized Mileage '+ dataset['Norm Mileage']

data = [ dict(
        type='choropleth',
        colorscale = 'Jet',
        autocolorscale = False,
        locations = dataset['States'],
        z = dataset['Ave Age'],
        locationmode = 'USA-states',
        text = dataset['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Average Car Age")
        ) ]

layout = dict(
        title = 'Average Used Car Age by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map')

In [5]:
#### Plotting counties across the US

# colorscale = ["#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
#               "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
#               "#08519c","#0b4083","#08306b"]
# endpts = list(np.linspace(1, 12, len(colorscale) - 1))
# #fips = df_sample['FIPS'].tolist()
# #values = df_sample['Unemployment Rate (%)'].tolist()

# fig = ff.create_choropleth(
#     fips=fips, values=ave_car_age, scope=['usa'],
#     binning_endpoints=endpts, colorscale=colorscale,
#     show_state_data=True,
#     show_hover=True, centroid_marker={'opacity': 0},
#     county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
#     state_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
#     asp=2.9, title='Average Used Car Age in US Counties',
#     legend_title='Average Used Car Age (years)'
# )
# py.iplot(fig, filename='choropleth_full_usa')


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.





High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jdanski/0 or inside your plot.ly account where it is named 'choropleth_full_usa'


In [6]:
# scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
#             [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

# # df['text'] = df['state'] + '<br>' +\
# #     'Beef '+df['beef']+' Dairy '+df['dairy']+'<br>'+\
# #     'Fruits '+df['total fruits']+' Veggies ' + df['total veggies']+'<br>'+\
# #     'Wheat '+df['wheat']+' Corn '+df['corn']

# dataset = pd.DataFrame({'States':used_cars.Code.unique(),'Price':ave_car_price_by_state})

# data = [ dict(
#         type='choropleth',
#         colorscale = scl,
#         autocolorscale = False,
#         locations = dataset['States'],
#         z = dataset['Price'],
#         locationmode = 'USA-states',
# #         text = df['text'],
#         marker = dict(
#             line = dict (
#                 color = 'rgb(255,255,255)',
#                 width = 2
#             ) ),
#         colorbar = dict(
#             title = "Average Car Price")
#         ) ]

# layout = dict(
#         title = 'Average Used Car Price by State<br>(Hover for breakdown)',
#         geo = dict(
#             scope='usa',
#             projection=dict( type='albers usa' ),
#             showlakes = True,
#             lakecolor = 'rgb(255, 255, 255)'),
#              )
    
# fig = dict( data=data, layout=layout )
# py.iplot( fig, filename='d3-cloropleth-map' )

In [7]:
# scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
#             [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

# # df['text'] = df['state'] + '<br>' +\
# #     'Beef '+df['beef']+' Dairy '+df['dairy']+'<br>'+\
# #     'Fruits '+df['total fruits']+' Veggies ' + df['total veggies']+'<br>'+\
# #     'Wheat '+df['wheat']+' Corn '+df['corn']

# dataset = pd.DataFrame({'States':used_cars.Code.unique(),'Milage':ave_car_mil_by_state})

# data = [ dict(
#         type='choropleth',
#         colorscale = 'Jet',
#         autocolorscale = False,
#         locations = dataset['States'],
#         z = dataset['Milage'],
#         locationmode = 'USA-states',
# #         text = df['text'],
#         marker = dict(
#             line = dict (
#                 color = 'rgb(255,255,255)',
#                 width = 2
#             ) ),
#         colorbar = dict(
#             title = "Average Car Milage")
#         ) ]

# layout = dict(
#         title = 'Average Used Car Milage by State<br>(Hover for breakdown)',
#         geo = dict(
#             scope='usa',
#             projection=dict( type='albers usa' ),
#             showlakes = True,
#             lakecolor = 'rgb(255, 255, 255)'),
#              )
    
# fig = dict( data=data, layout=layout )
# py.iplot( fig, filename='d3-cloropleth-map')