In [203]:
import pandas as pd
import altair as alt
import numpy as np
from datetime import datetime

In [204]:
data = pd.read_csv("Geolocated Data - Sales & Prospect - 5.11.2023.csv")

# Drop all features that are common between the prospect and sales dataframes
df = data.drop(['id','FullAddress', 'StreetName', 'JobStatus', 'SalesRepName1', 'SalesRepName2', 'Accuracy.Score', 'Accuracy.Type'], axis = 1)

# Prospect is defined by not having a contract date
prospect = df[df.ContractDate.isnull()]
sales = df.dropna(subset = ['ContractDate'])

# Drop unused features
sales = sales.drop(['DateAdded', 'Issued', 'Sat'], axis = 1)
prospect = prospect.drop(['GrossAmount', 'ContractDate','Source'], axis = 1)

# Drop rows with incomplete data
prospect = prospect.dropna()

# Remove zip codes that are not 5 digits (certain entries were throwing exceptions when converting to int)
prospect = prospect[prospect['Zip'].str.contains(r'^\d{5}$')]

sales = sales.where(sales['GrossAmount'] > 0, np.nan)
sales = sales.dropna()

# Adjust the type of various columns
sales = sales.astype({'productid':"str",
                      'City':'str',
                      'State':'str',
                      'Source':'str',
                      'SubSource':'str',
                      'Zip':'int'})

prospect = prospect.astype({'productid':'str',
                            'City':'str',
                            'State':'str',
                            'SubSource':'str'})
                            #'Zip':'int'})

# Set date field to use the datetime type
sales['ContractDate'] = pd.to_datetime(df.ContractDate)
prospect['DateAdded'] = pd.to_datetime(df.DateAdded)

# Adjust the labels for sunroom products
sales = sales.replace({'SR-10':'Sunroom',
                    'SR-11':'Sunroom',
                    'SR-12':'Sunroom',
                    'SR-13':'Sunroom',
                    'SR-16':'Sunroom',
                    'SR-19':'Sunroom',
                    'SR-20':'Sunroom',
                    'SR-21':'Sunroom',
                    'SR-22':'Sunroom',
                    'SR-23':'Sunroom',
                    'SR-24':'Sunroom',
                    'SR-25':'Sunroom',
                    'SR-3':'Sunroom',
                    'SR-4':'Sunroom',
                    'SR-5':'Sunroom',
                    'SR-6':'Sunroom',
                    'SR-9':'Sunroom',
                    'Sun':'Sunroom',
                    'PC':'Patio cover',
                    'Win':'Window'
})


  data = pd.read_csv("Geolocated Data - Sales & Prospect - 5.11.2023.csv")


In [209]:
alt.data_transformers.disable_max_rows()



interval = alt.selection_interval()

base = alt.Chart(sales).mark_point().encode(
    x='ContractDate:T',
    y='mean(GrossAmount)',
    color=alt.condition(interval,'productid', alt.value('lightgray')),
    tooltip = 'GrossAmount'
).properties(
    width=800
).add_selection(
    interval
)

hist = alt.Chart(sales).mark_bar().encode(
    x='count()',
    y='productid',
    color = 'productid'
).properties(
    width=800,
    height=80
).transform_filter(
    interval
)

base & hist


In [234]:
interval = alt.selection_interval()

click = alt.selection_multi(encodings=['color'])

base = alt.Chart(sales).mark_point().encode(
    x = 'ContractDate:T',
    y = 'GrossAmount',
    color = 'productid'
).transform_filter(
    click
)

chart = base.encode(
    x = alt.X('ContractDate:T', scale = alt.Scale(domain = interval.ref())),
).properties(
    width = 800,
    height = 300,
    title = 'Historic Sales Record'
)

hist = alt.Chart(sales).mark_bar().encode(
    x='count()',
    y='productid',
    color = 'productid'
).properties(
    width=800,
    height=80,
    title = 'Number of Sales in Selected Timeframe'
).add_selection(
    click
)

view = base.add_selection(
    interval
).properties(
    width = 800,
    height = 50
)

legend = alt.Chart(sales).mark_rect().encode(
    y = alt.Y('productid:N', axis = alt.Axis(title = 'Select product')),
    color = alt.condition(click, 'productid:N',
                        alt.value('lightgray'),
                         legend=None),
).properties(
    width = 50
).add_selection(
    click
)

(chart | legend) & view & hist
