In [83]:
# System imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import hvplot.pandas
import panel as pn
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact
from panel import widgets
from string import digits
import csv
import json
import numpy as np

pn.extension()

%matplotlib inline

In [84]:
# Local imports
import sys
sys.path.append("../lib2")

from Constants import Constants
from ProcessingTools import DateProcessingTools

In [85]:
# Build tools
debug_level = 0
constants = Constants()
tool_data_processing = DateProcessingTools(debug_level)

In [86]:
# Import all preprocessed data
atlas           = pd.read_pickle(constants.PREPROCESSED_ATLAS_FILE_PATH)
forecast        = pd.read_pickle(constants.PREPROCESSED_FORECAST_DATA_FILE_PATH)
revenue2020     = pd.read_pickle(constants.PREPROCESSED_REVENUE2020_FILE_PATH)
revenue2020A    = pd.read_pickle(constants.PREPROCESSED_REVENUE2020A_FILE_PATH)

In [87]:
# Group by year and neighborhood and then create a new dataframe of the mean values
atlas.head()
#avg_price_df = atlas.reset_index().groupby(by=['Subscription', 'Customers']).mean()
#avg_price_df.head()

Unnamed: 0_level_0,Invoice Date,Invoice #,Invoice Amount,Subscription,Account Code,Service Start,Service End
Customers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
University 1,2015-03-20 00:00:00-04:00,ATLAS 315,72000.0,1 Year,4700-0-00-00000-18-0000,2015-03-18 00:00:00-04:00,2016-06-30 00:00:00-04:00
University 102,2015-05-28 00:00:00-04:00,AJ501,3500.0,1 Year,4700-0-00-00000-16-0000,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00
University 3,2015-06-23 00:00:00-04:00,AJ502,3500.0,1 Year,4700-0-00-00000-17-0000,2015-06-10 00:00:00-04:00,2016-06-30 00:00:00-04:00
University 4,2015-06-26 00:00:00-04:00,AJ503,6500.0,1 Year,4700-0-00-00000-32-0000,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00
University 5,2015-10-07 00:00:00-04:00,AJ504,750.0,1 Year,4700-0-00-00000-20-0000,2015-10-05 00:00:00-04:00,2016-09-30 00:00:00-04:00


In [88]:
avg_price_df.dtypes

Invoice Amount    float64
dtype: object

In [94]:
# Use hvplot to create an interactive line chart of the average price per sq ft.
# The plot should have a dropdown selector for the neighborhood

avg_price_mean = avg_price_df

sfo_grouped_month_and_02_Anonymized_plot = avg_price_mean.hvplot.line(
    x='Customers',
    y='Invoice Amount',
    xlabel= 'Customers',
    ylabel='Invoice Amount',
    groupby='Subscription', 
    width=600
).opts(yformatter='%0f')

sfo_grouped_month_and_02_Anonymized_plot

In [90]:
# Group by year and neighborhood and then create a new dataframe of the mean values
atlas.head()
avg_price_df = atlas.reset_index().groupby(by=['Customers','Subscription']).mean()
avg_price_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Customers,Subscription,Unnamed: 2_level_1
University 1,1 Year,69500.0
University 10,1 Year,1466.666667
University 100,1 Year,1000.0
University 101,1 Year,1916.666667
University 102,1 Year,2100.0


In [91]:
avg_price_df.dtypes

Invoice Amount    float64
dtype: object

In [92]:
# Read the census data into a Pandas DataFrame
#file_path = Path("../Resources/sfo_atla.csv")
#sfo_data = pd.read_csv(file_path, index_col="year")
#sfo_data.head()

In [107]:
# Getting the data from the top 10 highest customers
#sfo(subscription, invoice)

customers = avg_price_df.groupby(["Invoice Amount", "Customers"]).max()
top_10_highest_customer = customers.sort_values(by='Invoice Amount', ascending=False).head(10)
top_10_highest_customer = top_10_highest_customer.reset_index()
top_10_highest_customer
top_10_highest_customer_plot = top_10_highest_customer.hvplot.bar(
    x='Customers',
    y='Invoice Amount',
    rot=90,
title='Top 10 Highest Customers')
top_10_highest_customer_plot

In [None]:
# Parallel Coordinates Plot
#px.parallel_coordinates(top_10_most_expensive, color='sale_price_sqr_foot')


In [None]:
# Parallel Categories Plot
#px.parallel_categories(
#    top_10_most_expensive,
#    dimensions=["neighborhood", "sale_price_sqr_foot", "housing_units", "gross_rent"],
 #   color="sale_price_sqr_foot",
#    color_continuous_scale=px.colors.sequential.Inferno
#)

In [124]:
# import libraries
import numpy as np
import pandas as pd
import hvplot.pandas
import panel as pn
pn.extension()
# create sample data
df = pd.DataFrame({
    'date': pd.date_range(start='01-01-2020', end='31-12-2020'),
    'status': np.random.choice(['confirmed', 'bedridden', 'recovered'], 366),
    'status2': np.random.choice(['A', 'B', 'C'], 366),
    'value': np.random.rand(366) * 100
})
types = ['confirmed', 'bedridden', 'recovered']
# you need to return your plot to get the interaction
def plot_heatmap(chosen_type):
    df_selected = df[df['status']==chosen_type]
    # hvplot is handy for creating interactive plots
    heatmap = df_selected.hvplot.heatmap(x='date', y='status2', C='value')
    return heatmap
# show your interactive plot with dropdown   
pn.interact(plot_heatmap, chosen_type=types)

In [149]:
# Use hvplot to create an interactive line chart of the average price per sq ft.
# The plot should have a dropdown selector for the neighborhood

avg_price_mean = avg_price_df

def get_plot(customer_filter, subscription_filter):
    df_counts = avg_price_mean.groupby(['Customers', 'Subscription']).count().reset_index()
    df_filtered = df_counts[ (df_counts['Customers'] == customer_filter ) & ( df_counts['Subscription'] == subscription_filter) ]
    return df_filtered.hvplot.bar(
        x='Customers',
        y='Invoice Amount',
        xlabel= 'Customers',
        ylabel='Invoice Amount',
        width=600
        ).opts(yformatter='%0f')

customer_filter_values = avg_price_mean.reset_index()['Customers'].drop_duplicates().to_list()
subscription_filter_values = avg_price_mean.reset_index()['Subscription'].drop_duplicates().to_list()

pn.interact(get_plot,
            customer_filter=customer_filter_values,
            subscription_filter=subscription_filter_values)

In [144]:
# avg_price_mean.groupby('Subscription').count()
# avg_price_mean.groupby(['Customers', 'Subscription']).count().loc["University 1"]

df_counts = avg_price_mean.groupby(['Customers', 'Subscription']).count().reset_index()
df_counts[ (df_counts['Customers'] == 'University 1' ) & ( df_counts['Subscription'] == '1 Year') ]

Unnamed: 0,Customers,Subscription,Invoice Amount
0,University 1,1 Year,1
