In [18]:
# System imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import hvplot.pandas
import panel as pn
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact
from panel import widgets
from string import digits
import csv
import json
import numpy as np
import dateparser
import panel as pn

pn.extension()

%matplotlib inline

In [19]:
# Local imports
import sys
sys.path.append("../lib2")

from Constants import Constants
from PreprocessingTools import MappingFunctions
from ProcessingTools import DateProcessingTools


In [20]:
# Construct the tools
debug_level = 0
constants = Constants()
tool_data_processing = DateProcessingTools(debug_level)
tool_lookup_tables = MappingFunctions(debug_level)


In [21]:
# Import all preprocessed data
atlas           = pd.read_pickle(constants.PREPROCESSED_ATLAS_FILE_PATH)
forecast        = pd.read_pickle(constants.PREPROCESSED_FORECAST_DATA_FILE_PATH)
revenue2020     = pd.read_pickle(constants.PREPROCESSED_REVENUE2020_FILE_PATH)
revenue2020A    = pd.read_pickle(constants.PREPROCESSED_REVENUE2020A_FILE_PATH)
atlas2          = pd.read_pickle(constants.PREPROCESSED_ATLAS_2_FILE_PATH)


In [22]:
load_dotenv()
mapbox_token = os.getenv("MAPBOX_API_KEY")

In [23]:
atlas2.head()

Unnamed: 0_level_0,Number of Users,Invoice Date,Invoice #,Invoice Amount,Subscription,Account Code,Address,Lat,Long,Service Start,Service End,Subscription Duration
Customers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
University 1,,2015-03-20 00:00:00-04:00,ATLAS 315,72000.0,1 Year,4700-0-00-00000-18-0000,"1201 N 3rd St #6, Baton Rouge, LA 70802",30.46019,-91.18853,2015-03-18 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
University 2,250.0,2015-05-28 00:00:00-04:00,AJ501,3500.0,1 Year,4700-0-00-00000-16-0000,"1 Kellogg Cir, Emporia, KS 66801",38.392609,-96.181396,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
University 3,100.0,2015-06-23 00:00:00-04:00,AJ502,3500.0,1 Year,4700-0-00-00000-17-0000,"150 University Blvd, Morehead, KY 40351",38.184921,-83.434441,2015-06-10 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
University 4,,2015-06-26 00:00:00-04:00,AJ503,6500.0,1 Year,4700-0-00-00000-32-0000,"44 Pierrepont Ave, Potsdam, NY 13676",44.66378,-74.978409,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
University 5,,2015-10-07 00:00:00-04:00,AJ504,750.0,1 Year,4700-0-00-00000-20-0000,"47645 College Dr, St Marys City, MD 20686",38.190601,-76.4263,2015-10-05 00:00:00-04:00,2016-09-30 00:00:00-04:00,365 days


In [24]:
atlas2_ymq = tool_data_processing.extract_and_append_year_month_quarter(atlas2, "Service Start")
atlas2_ymq.reset_index(inplace=True)
atlas2_ymq.sort_values(by="Service Start", ascending=True, inplace=True)

atlas2_ymq.set_index("Service Start", inplace=True)

total_invoices_per_year = atlas2_ymq.groupby('Year').sum().reset_index()
mean_invoices_per_year = atlas2_ymq.groupby('Year').mean().reset_index()
number_of_invoices_per_year = atlas2_ymq.groupby('Year').count().reset_index()

# total_invoices_per_year[total_invoices_per_year["Year"] == 2020]


In [25]:
atlas2_ymq.head(2)

# atlas2_ymq["Invoice Amount"].cumsum(axis=0)
# atlas2_ymq.loc[:, ["Invoice Amount", "Number of Users", "Subscription Duration"]].cumsum(axis=0)


Unnamed: 0_level_0,index,Customers,Number of Users,Invoice Date,Invoice #,Invoice Amount,Subscription,Account Code,Address,Lat,Long,Service End,Subscription Duration,Year,Monthly,Quarterly
Service Start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2015-03-18 00:00:00-04:00,0,University 1,,2015-03-20 00:00:00-04:00,ATLAS 315,72000.0,1 Year,4700-0-00-00000-18-0000,"1201 N 3rd St #6, Baton Rouge, LA 70802",30.46019,-91.18853,2016-06-30 00:00:00-04:00,365 days,2015.0,3.0,1.0
2015-06-01 00:00:00-04:00,1,University 2,250.0,2015-05-28 00:00:00-04:00,AJ501,3500.0,1 Year,4700-0-00-00000-16-0000,"1 Kellogg Cir, Emporia, KS 66801",38.392609,-96.181396,2016-06-30 00:00:00-04:00,365 days,2015.0,6.0,2.0


In [26]:
atlas2_ymq_filtered = atlas2_ymq[atlas2_ymq["Year"] == 2020]
atlas2_ymq_filtered_cumsum = atlas2_ymq_filtered.loc[:, ["Invoice Amount", "Number of Users", "Subscription Duration"]].cumsum(axis=0)
atlas2_ymq_filtered_cumsum

Unnamed: 0_level_0,Invoice Amount,Number of Users,Subscription Duration
Service Start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-21 00:00:00-05:00,2000.00,200.0,365 days 00:00:00
2020-01-31 00:00:00-05:00,2750.00,250.0,730 days 00:00:00
2020-01-31 00:00:00-05:00,3750.00,300.0,1095 days 00:00:00
2020-01-31 00:00:00-05:00,6100.00,435.0,1460 days 00:00:00
2020-01-31 00:00:00-05:00,9100.00,685.0,1825 days 00:00:00
...,...,...,...
2020-11-02 00:00:00-05:00,1035522.79,86534.0,-97229 days +00:25:26.290448384
2020-11-02 00:00:00-05:00,1039022.79,86734.0,-96864 days +00:25:26.290448384
2020-11-03 00:00:00-05:00,1041022.79,86784.0,-96499 days +00:25:26.290448384
2020-11-07 00:00:00-05:00,1044388.79,87095.0,-96134 days +00:25:26.290448384


In [27]:
atlas2_ymq_filtered = atlas2_ymq[atlas2_ymq["Year"] == 2020]
atlas2_ymq_filtered_cumsum = atlas2_ymq_filtered.loc[:, ["Invoice Amount", "Number of Users", "Subscription Duration"]].cumsum(axis=0)

invoice_trend_standard_plot = atlas2_ymq_filtered_cumsum.hvplot.line(
    x="Service Start",
    y=["Invoice Amount", "Number of Users"],
    # y=["Invoice Amount", "Number of Users", "Subscription Duration"],
    xlabel="Nbr Users",
    ylabel="Invoice Amount ($)",
    title="Invoice Amount (2020)",
    width=1000
)

invoice_trend_log_plot = atlas2_ymq_filtered_cumsum.hvplot.line(
    x="Service Start",
    y=["Invoice Amount", "Number of Users"],
    # y=["Invoice Amount", "Number of Users", "Subscription Duration"],
    xlabel="Nbr Users",
    ylabel="Invoice Amount ($)",
    title="Invoice Amount (2020)",
    width=1000,
    logy=True
)

invoice_trend_barh_plot = atlas2_ymq_filtered_cumsum.hvplot.barh(
    x="Service Start",
    y=["Invoice Amount", "Number of Users"],
    # y=["Invoice Amount", "Number of Users", "Subscription Duration"],
    xlabel="Nbr Users",
    ylabel="Invoice Amount ($)",
    title="Invoice Amount (2020)",
    width=1000,
    height=5000
)

pn.Column(invoice_trend_standard_plot, invoice_trend_log_plot,
#  invoice_trend_barh_plot
)

In [28]:
# Calculate the mean values for each neighborhood
#rankings_pd.rename(columns = {'test':'TEST', 'odi':'ODI', 
#                              't20':'T20'}, inplace = True) 

#atlas2.columns = ["Customers", "Lat", "Long"]

mean_data = atlas2.groupby(["Customers"]).mean()
mean_data.reset_index(inplace=True)
mean_data.dropna(inplace=True)
mean_data


Unnamed: 0,Customers,Number of Users,Invoice Amount,Lat,Long
1,University 10,116.666667,2240.002000,39.732210,-90.246947
5,University 103,100.000000,750.000000,45.408173,-122.922323
6,University 104,25.000000,5775.000000,45.521516,-122.984856
7,University 105,250.000000,1900.000000,28.062000,-82.413225
8,University 106,25.000000,500.000000,44.119155,-104.133391
...,...,...,...,...,...
418,University 93,250.000000,3500.000000,45.408173,-122.922323
419,University 94,50.000000,1250.000000,36.300110,-82.294030
420,University 95,17.500000,333.333333,33.959580,-102.350749
421,University 96,25.000000,2750.000000,45.507856,-122.690794


In [29]:
# px.set_mapbox_access_token(mapbox_token)
# map = px.scatter_mapbox(
#     mean_data,
#     lat="Lat",
#     lon="Long",
#     size="Invoice Amount",
#     color="Customers",
#     title="Mean Invoice Amount ($)",
#     width=1500,
#     height=800
#     # mapbox_style = 'stamen-watercolor'
# )
# map.show()

In [30]:

total_data = atlas2.groupby(["Customers"]).sum()
total_data.reset_index(inplace=True)
total_data.dropna(inplace=True)
total_data


Unnamed: 0,Customers,Number of Users,Invoice Amount,Lat,Long
0,University 1,0.0,139000.00,60.920165,-182.377131
1,University 10,350.0,11200.01,119.196630,-270.740841
2,University 100,0.0,5460.00,28.062000,-82.413225
3,University 101,0.0,750.00,48.630369,-97.471056
4,University 102,1050.0,14500.00,0.000000,0.000000
...,...,...,...,...,...
420,University 95,35.0,1000.00,67.919161,-204.701498
421,University 96,25.0,2750.00,45.507856,-122.690794
422,University 97,2050.0,18275.00,195.482126,-577.409882
423,University 98,1000.0,14000.00,0.000000,0.000000


In [31]:
# px.set_mapbox_access_token(mapbox_token)
# map = px.scatter_mapbox(
#     total_data,
#     lat="Lat",
#     lon="Long",
#     size="Invoice Amount",
#     color="Customers",
#     title="# Total Invoice Amount ($)",
#     width=1500,
#     height=800,
#     mapbox_style = 'basic'
# )
# map.show()

In [32]:
# def neighborhood_map():
#     """Neighborhood Map"""
#      = atlas2.groupby(["Customers"]).mean()
#     mean_data.reset_index(inplace=True)
#     px.set_mapbox_access_token(mapbox_token)
#     neighborhood_map_plot = px.scatter_mapbox(
#             mean_data,
#             lat="Lat",
#             lon="Long",
#             size="Subscription",
#             color="Customer"
#         )
#     return neighborhood_map_plot
# neighborhood_map()

In [39]:
# Identify existing and potential customers
customer_name_mapping               = tool_lookup_tables.read_customer_name_mapping()
all_potential_customers_mapping     = tool_lookup_tables.read_lookup_table(constants.LUT_POTENTIAL_CUSTOMER_NAME_FILE_PATH)

paying_customers = list(customer_name_mapping.keys())
all_potential_customers = list(all_potential_customers_mapping.keys())

customers_df = pd.DataFrame([paying_customers, all_potential_customers])
customers_df.head()

Unnamed: 0,0
0,"[Louisiana Board of Regents, Emporia State Uni..."
1,"[Aaniiih Nakoda College, Abdullah Gul Universi..."


In [37]:
# Calculate market penetration

# TODO remove duplicates
# TODO Match names
# TODO Generate pie chart

nbr_paying_customers = len(paying_customers)
nbr_potential_customers = len(all_potential_customers)
market_penetration = nbr_paying_customers / nbr_potential_customers

## Print results
print(f"The number of paying cusomters is {nbr_paying_customers}")
print(f"The number of potential cusomters is {nbr_potential_customers}")
print(f"Market penetration is {market_penetration * 100} %")

## Generate heatmap
# all_potential_customers   .hvplot.heatmap(x='time.month', y='time.day', C='temperature', 
#                   height=500, width=500, colorbar=False)


The number of paying cusomters is 719
The number of potential cusomters is 3835
Market penetration is 18.748370273794002 %


In [35]:
atlas2_ymq_filtered.head(2)

Unnamed: 0_level_0,index,Customers,Number of Users,Invoice Date,Invoice #,Invoice Amount,Subscription,Account Code,Address,Lat,Long,Service End,Subscription Duration,Year,Monthly,Quarterly
Service Start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-01-21 00:00:00-05:00,349,University 36,200.0,2020-04-23 00:00:00-04:00,NB042320B,2000.0,1 Year,4700-0-00-00000-00-0000,CA,37.680181,-121.921498,2018-11-12 18:06:08.352918-05:00,365 days,2020.0,1.0,1.0
2020-01-31 00:00:00-05:00,264,University 51,50.0,2020-01-27 00:00:00-05:00,000254,750.0,1 Year,4700-0-00-00000-00-0000,ND,48.630369,-97.471056,2021-01-31 00:00:00-05:00,365 days,2020.0,1.0,1.0


In [36]:
# Generate heatmap
atlas2_ymq_filtered.reset_index().hvplot.heatmap(x='Lat', y='Long', C='Invoice Amount', 
                  height=500, width=500, colorbar=False)


