## Import Packages

This script makes use of the packages below.  
* 'sqlalchemy' is used to create a connection engine to our Postgres database.
* 'pandas' allows us to query our database using SQL while arraying our data calls into dataframes.
* 'numpy', 'seaborn', and 'matplotlib' are used for visualization purposes.
* 'plotly' used for visualization.



In [1]:
import pandas as pd
import plotly
import plotly.graph_objs as go
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

plotly.__version__
plotly.offline.init_notebook_mode(connected=True)

### Import pre-processed data subset.

In [2]:
marketcheck = pd.read_pickle("listings.pkl", compression='infer')

print(marketcheck.head())

                   vin_ss vehicle_type_ss body_type_ss make_ss model_ss  \
231130  JHLRD78883C045742             SUV          SUV   HONDA     CR-V   
66452   JHLRE48777C090454             SUV          SUV   HONDA     CR-V   
144754  1HGEM225X1L115205             Car        Coupe   HONDA    CIVIC   
232386  2HGFG1B88BH518131             Car        Coupe   HONDA    CIVIC   
172683  2HGFA1F32AH537028             Car        Sedan   HONDA    CIVIC   

        year_is      fuel_type_ss engine_size_ss transmission_ss doors_is  \
231130     2003  Regular Unleaded            2.4       Automatic        4   
66452      2007  Regular Unleaded            2.4       Automatic        4   
144754     2001  Regular Unleaded            1.7       Automatic        2   
232386     2011  Regular Unleaded            1.8       Automatic        2   
172683     2010  Regular Unleaded            1.8       Automatic        4   

           ...      interior_color_ss_encoded exterior_color_ss_encoded  \
231130     

In [3]:
#Filters results from states not supported by  plotly map
filtered_df = marketcheck[(marketcheck['state_ss'] != 'GU') & (marketcheck['state_ss'] != 'PR') & (marketcheck['state_ss'] != 'WS')]

#Aggregate by state_ss, use reset_index() to be able to access group names from dataframework
aggregated_df = filtered_df[['state_ss','price_fs']].groupby(['state_ss'],as_index=False).agg({'price_fs':['mean','count']})
aggregated_df




Unnamed: 0_level_0,state_ss,price_fs,price_fs
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,count
0,AL,17311.719136,1620
1,AR,18988.161232,552
2,AZ,17007.092142,1769
3,CA,17591.415311,7315
4,CO,15995.636364,1573
5,DE,14749.779221,154
6,FL,16989.165454,6872
7,GA,16561.787922,4239
8,HI,17751.879173,629
9,IA,16579.268266,917


### Prepare Plotly Visualization

In [4]:
#Activate offline mode for splotly. If not done, visualization will not show since plotly works online by default.
plotly.offline.init_notebook_mode(connected=True)

#Define color scale

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


#Prepare text to show in states

def f(x):
    d = {}
    d['a_mean'] = x['a'].mean()
    d['a_count'] = x['a'].count()
    return pd.Series(d, index=['a_mean', 'a_count'])


aggregated_df['text'] = aggregated_df['state_ss']




In [5]:
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = aggregated_df['state_ss'],
        z = aggregated_df['price_fs']['count'].astype(float),
        locationmode = 'USA-states',
        text = aggregated_df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "Listings")
        ) ]

layout = dict(
        title = 'HONDA Lisings by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
plotly.offline.iplot( fig, filename='d3-cloropleth-map' )