In [59]:
# System imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import hvplot.pandas
import panel as pn
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact
from panel import widgets
from string import digits
import csv
import json
import numpy as np
import dateparser
import panel as pn

pn.extension()

%matplotlib inline


In [60]:
# Local imports
import sys
sys.path.append("../lib2")

from Constants import Constants
from ProcessingTools import DateProcessingTools


In [61]:
# Construct the tools
debug_level = 0
constants = Constants()
tool_data_processing = DateProcessingTools(debug_level)


In [62]:
# Import all preprocessed data
atlas           = pd.read_pickle(constants.PREPROCESSED_ATLAS_FILE_PATH)
forecast        = pd.read_pickle(constants.PREPROCESSED_FORECAST_DATA_FILE_PATH)
revenue2020     = pd.read_pickle(constants.PREPROCESSED_REVENUE2020_FILE_PATH)
revenue2020A    = pd.read_pickle(constants.PREPROCESSED_REVENUE2020A_FILE_PATH)


In [63]:
load_dotenv()
mapbox_token = os.getenv("MAPBOX_API_KEY")
px.set_mapbox_access_token(mapbox_token)


In [64]:
atlas = atlas.sort_values(by='Invoice Date')

In [65]:
atlas.reset_index(inplace=True)
atlas

Unnamed: 0,Customers,Number of Users,Invoice Date,Invoice #,Year,Month,Invoice Amount,Subscription,2021 Prediction,Customers Status,Account Code,Address,State,Lat,Long,Service Start,Service End,Subscription Duration
0,Louisiana Board of Regents,,2015-03-20 00:00:00-04:00,ATLAS 315,2015,March,72000.0,1 Year,,Lost,4700-0-00-00000-18-0000,"1201 N 3rd St #6, Baton Rouge, LA 70802",LA,30.460190,-91.188530,2015-03-18 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
1,Emporia State University,250.0,2015-05-28 00:00:00-04:00,AJ501,2015,May,3500.0,1 Year,,Active,4700-0-00-00000-16-0000,"1 Kellogg Cir, Emporia, KS 66801",KS,38.392609,-96.181396,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
2,Morehead State University,100.0,2015-06-23 00:00:00-04:00,AJ502,2015,June,3500.0,1 Year,,Active,4700-0-00-00000-17-0000,"150 University Blvd, Morehead, KY 40351",KY,38.184921,-83.434441,2015-06-10 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
3,State University of NY Potsdam,,2015-06-26 00:00:00-04:00,AJ503,2015,June,6500.0,1 Year,,Lost,4700-0-00-00000-32-0000,"44 Pierrepont Ave, Potsdam, NY 13676",NY,44.663780,-74.978409,2015-06-01 00:00:00-04:00,2016-06-30 00:00:00-04:00,365 days
4,ST. Mary's College of Maryland,,2015-10-07 00:00:00-04:00,AJ504,2015,October,750.0,1 Year,,Lost,4700-0-00-00000-20-0000,"47645 College Dr, St Marys City, MD 20686",MD,38.190601,-76.426300,2015-10-05 00:00:00-04:00,2016-09-30 00:00:00-04:00,365 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689,George Mason University,350.0,2020-12-03 00:00:00-05:00,NB120320A,2020,December,2500.0,6 Months,,New,4700-0-00-00000-00-0000,"4400 University Dr, Fairfax, VA 22030",VA,38.835479,-77.309040,2021-01-01 00:00:00-05:00,2021-06-30 00:00:00-04:00,180 days
690,"University of South Carolina, Beaufort",150.0,2020-12-03 00:00:00-05:00,NB120320D,2020,December,1500.0,6 Months,"$1,500.00",New,4700-0-00-00000-00-0000,"1 University Blvd, Bluffton, SC 29909",SC,32.303215,-80.973589,2020-12-01 00:00:00-05:00,2021-05-31 00:00:00-04:00,180 days
691,City University of New York,300.0,2020-12-03 00:00:00-05:00,NB120320B,2020,December,2250.0,6 Months,,New,4700-0-00-00000-00-0000,"205 E. 42nd Street, 9th FL., New York, NY 10017",NY,40.750810,-73.973537,2021-01-01 00:00:00-05:00,2021-06-30 00:00:00-04:00,180 days
692,Saint Xavier University,20.0,2020-12-03 00:00:00-05:00,NB120320E,2020,December,200.0,1 Year,,New,4700-0-00-00000-00-0000,"3700 W. 103rd St, Chicago, IL 60655",IL,41.707169,-87.713854,2020-08-01 00:00:00-04:00,2021-07-31 00:00:00-04:00,365 days


In [66]:
atlas.dtypes

Customers                                          object
Number of Users                                   float64
Invoice Date             datetime64[ns, America/New_York]
Invoice #                                          object
Year                                                int64
Month                                              object
Invoice Amount                                    float64
Subscription                                       object
2021 Prediction                                    object
Customers Status                                   object
Account Code                                       object
Address                                            object
State                                              object
Lat                                               float64
Long                                              float64
Service Start            datetime64[ns, America/New_York]
Service End              datetime64[ns, America/New_York]
Subscription D

In [67]:
yearly_income = atlas.iloc[:, [0, 6, 4]]
yearly_income

Unnamed: 0,Customers,Invoice Amount,Year
0,Louisiana Board of Regents,72000.0,2015
1,Emporia State University,3500.0,2015
2,Morehead State University,3500.0,2015
3,State University of NY Potsdam,6500.0,2015
4,ST. Mary's College of Maryland,750.0,2015
...,...,...,...
689,George Mason University,2500.0,2020
690,"University of South Carolina, Beaufort",1500.0,2020
691,City University of New York,2250.0,2020
692,Saint Xavier University,200.0,2020


In [156]:
grp_yearly_income = yearly_income.groupby('Year').sum()
grp_yearly_income.hvplot(figsize=(20,15), rot=90, kind='bar', subplots=True).opts(yformatter='%d', title='Total Sales from 2015 to 2020', ylabel='Income Each Year', xlabel='Total Number of Years', height=600, width=1200)

In [69]:
atlas.dtypes

Customers                                          object
Number of Users                                   float64
Invoice Date             datetime64[ns, America/New_York]
Invoice #                                          object
Year                                                int64
Month                                              object
Invoice Amount                                    float64
Subscription                                       object
2021 Prediction                                    object
Customers Status                                   object
Account Code                                       object
Address                                            object
State                                              object
Lat                                               float64
Long                                              float64
Service Start            datetime64[ns, America/New_York]
Service End              datetime64[ns, America/New_York]
Subscription D

In [70]:
monthly_income = atlas.iloc[:, [0, 5, 6, 2]]
monthly_income.sort_values(by='Invoice Date')

Unnamed: 0,Customers,Month,Invoice Amount,Invoice Date
0,Louisiana Board of Regents,March,72000.0,2015-03-20 00:00:00-04:00
1,Emporia State University,May,3500.0,2015-05-28 00:00:00-04:00
2,Morehead State University,June,3500.0,2015-06-23 00:00:00-04:00
3,State University of NY Potsdam,June,6500.0,2015-06-26 00:00:00-04:00
4,ST. Mary's College of Maryland,October,750.0,2015-10-07 00:00:00-04:00
...,...,...,...,...
692,Saint Xavier University,December,200.0,2020-12-03 00:00:00-05:00
688,West Liberty University,December,3500.0,2020-12-03 00:00:00-05:00
687,"CA State University, Stanislaus",December,100.0,2020-12-03 00:00:00-05:00
689,George Mason University,December,2500.0,2020-12-03 00:00:00-05:00


In [157]:
month_names = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ]
grp_monthly_income = monthly_income.groupby('Month').sum().reindex(month_names)
grp_monthly_income.hvplot(figsize=(20,15), rot=90, kind='bar', subplots=True).opts(yformatter='%d', title='Total Monthly Income from 2015 to Present', ylabel='Income Each Month', xlabel='Months', height=600, width=1200)

In [72]:
yearly_income = atlas.iloc[:, [0, 6, 4, 5]]
yearly_income

Unnamed: 0,Customers,Invoice Amount,Year,Month
0,Louisiana Board of Regents,72000.0,2015,March
1,Emporia State University,3500.0,2015,May
2,Morehead State University,3500.0,2015,June
3,State University of NY Potsdam,6500.0,2015,June
4,ST. Mary's College of Maryland,750.0,2015,October
...,...,...,...,...
689,George Mason University,2500.0,2020,December
690,"University of South Carolina, Beaufort",1500.0,2020,December
691,City University of New York,2250.0,2020,December
692,Saint Xavier University,200.0,2020,December


In [73]:
past_years = yearly_income.groupby('Year').sum()
past_years

Unnamed: 0_level_0,Invoice Amount
Year,Unnamed: 1_level_1
2015,204350.0
2016,251050.0
2017,293689.0
2018,243570.0
2019,258075.0
2020,1099566.22


In [74]:
start_to_2019 = past_years.loc[[2015, 2016, 2017, 2018, 2019]]
start_to_2019

Unnamed: 0_level_0,Invoice Amount
Year,Unnamed: 1_level_1
2015,204350.0
2016,251050.0
2017,293689.0
2018,243570.0
2019,258075.0


In [75]:
grp_past_years = start_to_2019.groupby('Year').sum()
grp_past_years.hvplot(figsize=(20,15), rot=90, kind='bar', subplots=True).opts(yformatter='%d', title='Average of Sales from 2015 to 2019', ylabel='Income Each Year', xlabel='Total Number of Years', height=600, width=1200)

In [76]:
monthly_income_15_19 = atlas.iloc[:, [0, 6, 4, 5]]
monthly_income_15_19

Unnamed: 0,Customers,Invoice Amount,Year,Month
0,Louisiana Board of Regents,72000.0,2015,March
1,Emporia State University,3500.0,2015,May
2,Morehead State University,3500.0,2015,June
3,State University of NY Potsdam,6500.0,2015,June
4,ST. Mary's College of Maryland,750.0,2015,October
...,...,...,...,...
689,George Mason University,2500.0,2020,December
690,"University of South Carolina, Beaufort",1500.0,2020,December
691,City University of New York,2250.0,2020,December
692,Saint Xavier University,200.0,2020,December


In [130]:
past_years_months = monthly_income_15_19.groupby(['Year', 'Month']).sum()
past_years_months

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Year,Month,Unnamed: 2_level_1
2015,December,86700.0
2015,June,10000.0
2015,March,72000.0
2015,May,3500.0
2015,October,32150.0
...,...,...
2020,March,78543.0
2020,May,36185.0
2020,November,71379.0
2020,October,126485.0


In [171]:
months_start_to_2019 = past_years_months.loc[[2015, 2016, 2017, 2018, 2019]]
months_start_to_2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Year,Month,Unnamed: 2_level_1
2015,December,86700.0
2015,June,10000.0
2015,March,72000.0
2015,May,3500.0
2015,October,32150.0
2016,August,13500.0
2016,February,2250.0
2016,January,11500.0
2016,July,4500.0
2016,June,78250.0


In [181]:
month_names = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ]
grp_past_months_start_to_2019 = months_start_to_2019.groupby('Month').mean().reindex(month_names)
grp_past_months_start_to_2019_plt = grp_past_months_start_to_2019.hvplot(figsize=(20,15), rot=90, kind='line', subplots=True).opts(yformatter='%d', title='Average Monthly Sales from 2015 to 2019 VS. Year 2020', ylabel='Income Each Month', xlabel='Months', height=600, width=1200)
grp_past_months_start_to_2019_plt

In [173]:
months_2020 = past_years_months.loc[[2020]]
months_2020

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Year,Month,Unnamed: 2_level_1
2020,April,141574.43
2020,August,201541.51
2020,December,24705.0
2020,February,450.0
2020,January,11400.0
2020,July,82133.0
2020,June,108549.38
2020,March,78543.0
2020,May,36185.0
2020,November,71379.0


In [182]:
month_names = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ]
grp_past_months_2020 = months_2020.groupby('Month').mean().reindex(month_names)
grp_past_months_2020_plt = grp_past_months_2020.hvplot(figsize=(20,15), rot=90, kind='line', subplots=True).opts(yformatter='%d', title='Average Monthly Sales in 2020', ylabel='Income Each Month', xlabel='Months', height=600, width=1200)
grp_past_months_2020_plt

In [184]:
compare_15_19_t0_20 = grp_past_months_start_to_2019_plt * grp_past_months_2020_plt
compare_15_19_t0_20

In [124]:
atlas.dtypes

Customers                                          object
Number of Users                                   float64
Invoice Date             datetime64[ns, America/New_York]
Invoice #                                          object
Year                                                int64
Month                                              object
Invoice Amount                                    float64
Subscription                                       object
2021 Prediction                                    object
Customers Status                                   object
Account Code                                       object
Address                                            object
State                                              object
Lat                                               float64
Long                                              float64
Service Start            datetime64[ns, America/New_York]
Service End              datetime64[ns, America/New_York]
Subscription D

In [128]:
customer_status = atlas.iloc[:, [0, 1, 4, 6, 9]]
customer_status

Unnamed: 0,Customers,Number of Users,Year,Invoice Amount,Customers Status
0,Louisiana Board of Regents,,2015,72000.0,Lost
1,Emporia State University,250.0,2015,3500.0,Active
2,Morehead State University,100.0,2015,3500.0,Active
3,State University of NY Potsdam,,2015,6500.0,Lost
4,ST. Mary's College of Maryland,,2015,750.0,Lost
...,...,...,...,...,...
689,George Mason University,350.0,2020,2500.0,New
690,"University of South Carolina, Beaufort",150.0,2020,1500.0,New
691,City University of New York,300.0,2020,2250.0,New
692,Saint Xavier University,20.0,2020,200.0,New


In [145]:
past_years_months = monthly_income_15_19.groupby(['Year', 'Month']).sum()
past_years_months

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Year,Month,Unnamed: 2_level_1
2015,December,86700.0
2015,June,10000.0
2015,March,72000.0
2015,May,3500.0
2015,October,32150.0
...,...,...
2020,March,78543.0
2020,May,36185.0
2020,November,71379.0
2020,October,126485.0


In [123]:
months_2020 = past_years_months.loc[[2020]]
months_2020

Unnamed: 0_level_0,Unnamed: 1_level_0,Invoice Amount
Year,Month,Unnamed: 2_level_1
2020,April,141574.43
2020,August,201541.51
2020,December,24705.0
2020,February,450.0
2020,January,11400.0
2020,July,82133.0
2020,June,108549.38
2020,March,78543.0
2020,May,36185.0
2020,November,71379.0


In [None]:
month_names = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ]
grp_past_months_2020 = months_2020.groupby('Month').mean().reindex(month_names)
grp_past_months_2020_plt = grp_past_months_2020.hvplot(figsize=(20,15), rot=90, kind='bar', subplots=True).opts(yformatter='%d', title='Average Monthly Sales in 2020', ylabel='Income Each Month', xlabel='Months', height=600, width=1200)
grp_past_months_2020_plt

In [99]:
daily_income = atlas.iloc[:, [2, 6]]
daily_income

Unnamed: 0,Invoice Date,Invoice Amount
0,2015-03-20 00:00:00-04:00,72000.0
1,2015-05-28 00:00:00-04:00,3500.0
2,2015-06-23 00:00:00-04:00,3500.0
3,2015-06-26 00:00:00-04:00,6500.0
4,2015-10-07 00:00:00-04:00,750.0
...,...,...
689,2020-12-03 00:00:00-05:00,2500.0
690,2020-12-03 00:00:00-05:00,1500.0
691,2020-12-03 00:00:00-05:00,2250.0
692,2020-12-03 00:00:00-05:00,200.0


In [100]:
daily_income.describe()

Unnamed: 0,Invoice Amount
count,692.0
mean,3396.387601
std,6574.759452
min,40.0
25%,750.0
50%,1800.0
75%,3500.0
max,72000.0


In [102]:
atlas.dtypes

Customers                                          object
Number of Users                                   float64
Invoice Date             datetime64[ns, America/New_York]
Invoice #                                          object
Year                                                int64
Month                                              object
Invoice Amount                                    float64
Subscription                                       object
2021 Prediction                                    object
Customers Status                                   object
Account Code                                       object
Address                                            object
State                                              object
Lat                                               float64
Long                                              float64
Service Start            datetime64[ns, America/New_York]
Service End              datetime64[ns, America/New_York]
Subscription D

In [134]:
daily_income = atlas.iloc[:, [0, 6, 9, 4]]
daily_income

Unnamed: 0,Customers,Invoice Amount,Customers Status,Year
0,Louisiana Board of Regents,72000.0,Lost,2015
1,Emporia State University,3500.0,Active,2015
2,Morehead State University,3500.0,Active,2015
3,State University of NY Potsdam,6500.0,Lost,2015
4,ST. Mary's College of Maryland,750.0,Lost,2015
...,...,...,...,...
689,George Mason University,2500.0,New,2020
690,"University of South Carolina, Beaufort",1500.0,New,2020
691,City University of New York,2250.0,New,2020
692,Saint Xavier University,200.0,New,2020


In [149]:
multi_group = daily_income.groupby(['Customers','Customers Status'])['Customers Status'].count()
multi_group

Customers                                    Customers Status
Alabama A&M University-College of Education  Active              1
Albertus Magnus College                      Active              1
Albuquerque Public Schools                   Lost                1
Alcorn State University                      Lost                1
Alderson Broaddus University                 New                 1
                                                                ..
Wingate University                           New                 1
Winston-Salem State University               New                 1
Winthrop University                          Active              7
Yakima Valley College                        New                 1
Young Harris College/Miller Library          Lost                2
Name: Customers Status, Length: 392, dtype: int64

In [151]:
new_customers = multi_group.loc[['New']].count()
new_customers

0

In [150]:
multi_group.hvplot()

DataError: None of the available storage backends were able to support the supplied data format. PandasInterface raised following error:

 cannot insert Customers Status, already exists

PandasInterface expects tabular data, for more information on supported datatypes see http://holoviews.org/user_guide/Tabular_Datasets.html

In [84]:
# Calculate the mean values for each neighborhood
#rankings_pd.rename(columns = {'test':'TEST', 'odi':'ODI', 
#                              't20':'T20'}, inplace = True) 

#atlas2.columns = ["Customers", "Lat", "Long"]

total_data = atlas.loc[:, ["Number of Users", "Invoice Amount"]].groupby(["Customers"]).sum()
# total_data.dropna(inplace=True)
total_data

atlas_with_totals = pd.concat([atlas.drop(columns=["Number of Users", "Invoice Amount"]), total_data], axis="columns", join="inner")
atlas_with_totals.head(5)


KeyError: 'Customers'

In [None]:
atlas_with_totals.dtypes

In [None]:
atlas_with_totals.count()

In [None]:
atlas_with_totals.set_index(pd.to_datetime(atlas_with_totals['Invoice Date'], infer_datetime_format=True), inplace=True)
atlas_with_totals["Invoice Amount"].plot(kind='bar', figsize=(20,10))

In [None]:
atlas_with_totals.describe()

In [None]:
atlas_with_totals.iloc[0]

In [None]:
data = atlas_with_totals.iloc[:, [0,2,5,11,12]]
data.head()

In [None]:
data.drop(columns=['Invoice Date', 'Subscription', 'Number of Users'], inplace=True)
data

In [None]:
data.sort_values("Invoice Amount", ascending=False)
data.plot()

In [None]:
data.groupby(by='State').sum()
data

In [None]:
data = data.sort_values(by='Invoice Date', ascending=False)

In [None]:
data.plot(figsize=(20,15))

In [None]:
data.tail()

In [None]:
data.drop(columns=['State'], inplace=True)
data

In [None]:
daily_returns = (data - data.shift(1)) / data.shift(1)
daily_returns.head()

In [None]:
daily_returns = data.pct_change()
daily_returns.head()

In [None]:
daily_returns.plot(figsize=(10,5))

In [None]:
# Calculate the cumulative returns using the 'cumprod()' function
cumulative_returns = (1 + daily_returns).cumprod()
cumulative_returns.head()

In [None]:
# Plot the daily returns of the S&P 500 over the last 5 years
cumulative_returns.plot(figsize=(10,5))