 # Plotting with Plotly

 Put your Parallel Coordinates plotting knowledge to use by using the plot to visaulize and analyze the relationship between sales, foreclosures, and year for Allgehany County in Pennsylvania.

In [58]:
import plotly.express as px
import pandas as pd
from pathlib import Path

 ### Prep Data for Calculating Total Number of Sales and Foreclosures

In [66]:
# Read in data
sales = pd.read_csv(
    Path("../../Resources/allegheny_sales.csv"),
    infer_datetime_format=True,
    parse_dates=True,
    index_col="SALEDATE",
).dropna()

foreclosures = pd.read_csv(
    Path("../../Resources/allegheny_foreclosures.csv"),
    infer_datetime_format=True,
    parse_dates=True,
    index_col="filing_date",
).dropna()

# Slice data and get the count of instances by year
foreclosures_grp_cnt = (
    foreclosures[["amount"]].groupby([foreclosures.index.year]).count()
)
sales_grp_cnt = sales[["PRICE"]].groupby([sales.index.year]).count()

# Rename columns to be 'num_sales' and 'num_foreclosures'
sales_grp_cnt.columns = ["num_sales"]
foreclosures_grp_cnt.columns = ["num_foreclosures"]

In [60]:
# Concatenate data
sales_foreclosures_cnt = pd.concat([sales_grp_cnt, foreclosures_grp_cnt], axis=1)
sales_foreclosures_cnt.dropna(inplace=True)
sales_foreclosures_cnt.reset_index(inplace=True)
sales_foreclosures_cnt.rename(columns={
    'index':'year',
    'num_sales':'sales_grp_cnt',
    'num_foreclosures':'foreclosures_grp_cnt'
}, inplace=True)

sales_foreclosures_cnt = sales_foreclosures_cnt[["sales_grp_cnt", "year", "foreclosures_grp_cnt"]]
sales_foreclosures_cnt.head()

Unnamed: 0,sales_grp_cnt,year,foreclosures_grp_cnt
0,85.0,2012,2893
1,93.0,2013,2841
2,97.0,2014,2676
3,108.0,2015,2431
4,102.0,2016,2163


 ### Plot data

In [61]:
# Plot data using parallel_coordinates plot
px.parallel_coordinates(sales_foreclosures_cnt, color='year')

 ### Prep Data for Calculating Total Amounts for Sales and Foreclosures

In [62]:
# Group data and calculate total amount in foreclosures and sales by year
sales_sum_total = sales[['PRICE']].groupby(sales.index.year).sum()
foreclosures_sum_total = foreclosures[['amount']].groupby(foreclosures.index.year).sum()
# Rename columns to 'amount_from_sales' and 'amount_from_foreclosures'

In [63]:
# Concatenate sums

amounts = pd.concat([sales_sum_total, foreclosures_sum_total])
amounts.reset_index(inplace=True)
amounts = amounts[["PRICE", "index", "amount"]]
amounts.head()

Unnamed: 0,PRICE,index,amount
0,11181723.0,2012,
1,14014365.0,2013,
2,15779621.0,2014,
3,16596990.0,2015,
4,13499315.0,2016,


 ### Plot Data

In [64]:
# Use parallel_coordinates to plot data
px.parallel_coordinates(amounts, color='index')
