In [74]:
%pip install pandas
%pip install prophet


Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [75]:
from prophet import Prophet
import pandas as pd

# Data Preparation

In [76]:
df_bills = pd.read_csv('data/bills.csv')
df_venues = pd.read_csv('data/venues.csv')

In [87]:
df_venues.columns

Index(['venue_xref_id', 'concept', 'city', 'country'], dtype='object')

In [78]:
df_venues = df_venues.drop(columns='start_of_day_offset')
df_venues

Unnamed: 0,venue_xref_id,concept,city,country
0,ea74268311cfcc47d2b4c38ef08b9dab0117226a9a99a9...,FAST_FOOD,Albuquerque,US
1,e7cec41c46a9706ba1a702b368be5431913b6dc9c9da49...,FAMILY_DINING,Brooklyn,US
2,e63e79791883b0c4ac71d41eaee727932c13d6a4ec8f78...,FAST_CASUAL,Windsor,CA
3,c100388c9328b30b6bdfbea43f113d367ae9d32b03e84b...,BAR,Burnaby,CA
4,a819ee7f83f27dd3d7f44515e22aca3c26e6704ddd2a9b...,FAMILY_DINING,Tisdale,CA
...,...,...,...,...
596,cfd1fa6e8f122e93b1cb02e7b7541749335e7088095f8e...,,Toronto,CA
597,75e7251298f2d62406dbfe1a11011cffdad90b27704687...,,Richardson,US
598,40c38939612f59ad418569e94a075935576bff0449c7f8...,FAST_CASUAL,Guelph,CA
599,2ed5bf26505a6064d09b40bacdb6fccc9665c3de029494...,,Mississauga,CA


## Data Cleaning

In [79]:
missing_venues = df_venues[~df_venues['venue_xref_id'].isin(df_bills['venue_xref_id'])]
missing_venues

# All 'venue_xref_id' in df_venues exist in 'venue_xref_id' column in df_bills

Unnamed: 0,venue_xref_id,concept,city,country


In [81]:
print(df_venues['concept'].unique())

# We will omit fine_dining restaurants since they don't do promotions
# We will omit pop_up restaurants since they don't have long-term data
concept_counts = df_venues['concept'].value_counts(dropna=False)
filtered_counts = concept_counts.loc[['FINE_DINING', 'POP_UP']]
nan_count = df_venues['concept'].isna().sum()
filtered_counts.loc['NaN'] = nan_count
print(filtered_counts)

['FAST_FOOD' 'FAMILY_DINING' 'FAST_CASUAL' 'BAR' 'CAFE' 'SPORTS_CLUB' nan
 'FINE_DINING' 'BREWERY' 'POP_UP' 'BAKERY' 'BUFFET' 'HOTEL'
 'ENTERTAINMENT_COMPLEX' 'FOOD_TRUCK']
concept
FINE_DINING     19
POP_UP           1
NaN            136
Name: count, dtype: int64


In [91]:
print(df_venues['city'].isna().sum())
# No NaN in city column

0


In [98]:
# Get venue_xref_id for rows where concept is 'FINE_DINING', 'POP_UP', or NaN
venues_to_remove = df_venues[df_venues['concept'].isin(['FINE_DINING', 'POP_UP']) | df_venues['concept'].isna()]['venue_xref_id'].unique()
# Keep only rows where venue_xref_id is NOT in venues_to_remove
df_venues = df_venues[~df_venues['venue_xref_id'].isin(venues_to_remove)]
# Remove rows from df_bills that have the filtered venue_xref_id
df_bills = df_bills[~df_bills['venue_xref_id'].isin(venues_to_remove)]
print(f"Filtered df_venues: {df_venues.shape}")
print(f"Filtered df_bills: {df_bills.shape}")


Filtered df_venues: (445, 4)
Filtered df_bills: (7121732, 20)


In [97]:
df_bills.columns

Index(['bill_paid_at_local', 'bill_total_billed',
       'bill_total_discount_item_level', 'bill_total_gratuity',
       'bill_total_net', 'bill_total_tax', 'bill_total_voided', 'bill_uuid',
       'business_date', 'order_duration_seconds', 'order_seated_at_local',
       'order_closed_at_local', 'order_take_out_type_label', 'order_uuid',
       'payment_amount', 'payment_count', 'payment_total_tip',
       'sales_revenue_with_tax', 'venue_xref_id', 'waiter_uuid'],
      dtype='object')

In [101]:
df_merged = df_bills.merge(df_venues, on="venue_xref_id", how="left")
df_merged


Unnamed: 0,bill_paid_at_local,bill_total_billed,bill_total_discount_item_level,bill_total_gratuity,bill_total_net,bill_total_tax,bill_total_voided,bill_uuid,business_date,order_duration_seconds,...,order_uuid,payment_amount,payment_count,payment_total_tip,sales_revenue_with_tax,venue_xref_id,waiter_uuid,concept,city,country
0,2024-07-02 20:00:53,102.53,0.00,0.0,95.00,7.53,0.0,240702200053~8792ADCC-545E-4AF3-9836-9C428ED8285C,2024-07-02,5071,...,240702183622~4B0A3D27-501D-4C94-BEBC-7B7F3C91A0F4,124.53,1,22.0,102.53,885332b7f22a142e21b7459473003fddc17bfca5753ceb...,200421150750~37D0C51E-EC4F-4EA1-B549-D223DA183ABD,BAR,Grand Forks,US
1,2024-07-03 21:43:39,5.38,5.00,0.0,5.00,0.38,0.0,240703214339~B06B2A2F-B1A0-44C0-920A-242F41B58BD2,2024-07-03,6968,...,240703194731~40651D6D-4A16-4889-8C12-0F9A00D8BCF7,5.38,1,0.0,5.38,885332b7f22a142e21b7459473003fddc17bfca5753ceb...,220104154507~89085BB4-771B-40A1-9D20-B9745CC1DAFA,BAR,Grand Forks,US
2,2024-07-03 18:23:56,141.90,0.00,0.0,132.00,9.90,0.0,240703182356~D082FE98-841C-4EAB-B15E-BEA41FA1CDF0,2024-07-03,2011,...,240703175025~0AEB61B0-220A-45A3-807E-78A1F432BB4C,171.90,1,30.0,141.90,885332b7f22a142e21b7459473003fddc17bfca5753ceb...,220104154507~89085BB4-771B-40A1-9D20-B9745CC1DAFA,BAR,Grand Forks,US
3,2024-07-03 19:33:21,63.77,0.00,0.0,59.00,4.77,0.0,240703193321~273CBB07-FAB0-49B7-A445-8474F9A4570F,2024-07-03,5480,...,240703180201~4B276496-BE92-4DE6-BD75-565929827014,73.77,1,10.0,63.77,885332b7f22a142e21b7459473003fddc17bfca5753ceb...,220104154507~89085BB4-771B-40A1-9D20-B9745CC1DAFA,BAR,Grand Forks,US
4,2024-07-05 21:01:14,210.65,0.00,0.0,195.00,15.65,0.0,240705210114~D423B906-1AEE-4DB8-84BA-234267A14E05,2024-07-05,4773,...,240705194141~F90FEB4F-D238-449E-B476-D16966DF4EEC,252.65,1,42.0,210.65,885332b7f22a142e21b7459473003fddc17bfca5753ceb...,220104154507~89085BB4-771B-40A1-9D20-B9745CC1DAFA,BAR,Grand Forks,US
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7121727,2024-12-29 20:32:54,82.42,0.00,0.0,82.42,0.00,0.0,241229203254~464AA06D-F986-4C51-8A53-A7B8362072D6,2024-12-29,4382,...,241229191952~96F91DE2-5ECC-4AC9-9228-B0741F3494B1,82.42,1,0.0,82.42,45f1736264898588301e2983fd2de6969a9af33aa0f6f8...,220831171122~1455DBA8-211C-4281-BF27-8698365CA3EB,FAMILY_DINING,Edmonton,CA
7121728,2024-12-30 20:36:19,87.92,0.00,0.0,87.92,0.00,0.0,241230203619~37348A86-98D1-4113-8EDD-06A3CF61A85B,2024-12-30,5244,...,241230190855~BB4604F2-B7BA-487A-99AF-7AEB77CA3EEC,87.92,1,0.0,87.92,45f1736264898588301e2983fd2de6969a9af33aa0f6f8...,230522185246~9369CFD3-1EED-48B1-A116-F8A4F6B66FC6,FAMILY_DINING,Edmonton,CA
7121729,2024-12-31 14:24:23,51.96,0.00,0.0,51.96,0.00,0.0,241231142423~9D2E9295-9E59-4E76-8242-D230C9FD2494,2024-12-31,963,...,241231140820~4D531668-3D68-4789-A1DB-BE324D4583D5,51.96,1,0.0,51.96,45f1736264898588301e2983fd2de6969a9af33aa0f6f8...,161216233231~64B016BA-D9BA-49C7-8726-4BCB6516A11D,FAMILY_DINING,Edmonton,CA
7121730,2024-12-31 17:00:34,49.35,11.59,0.0,49.35,0.00,0.0,241231170034~A9B4401D-26DC-4FC5-A4F9-0261D950DB3B,2024-12-31,59,...,241231165935~27989697-086C-4F7C-8AB2-8C0A8B2CF3A7,49.35,1,0.0,49.35,45f1736264898588301e2983fd2de6969a9af33aa0f6f8...,230522185246~9369CFD3-1EED-48B1-A116-F8A4F6B66FC6,FAMILY_DINING,Edmonton,CA


In [130]:
df_merged['bill_paid_at_local'] = pd.to_datetime(df_merged['bill_paid_at_local'])

# Sort the dataframe by bill_paid_at_local in ascending order (earliest to latest)
df_sorted = df_merged.sort_values(by='bill_paid_at_local', ascending=True)

df_sorted

Unnamed: 0,bill_paid_at_local,bill_total_billed,bill_total_discount_item_level,bill_total_gratuity,bill_total_net,bill_total_tax,bill_total_voided,bill_uuid,business_date,order_duration_seconds,...,order_uuid,payment_amount,payment_count,payment_total_tip,sales_revenue_with_tax,venue_xref_id,waiter_uuid,concept,city,country
2190376,2024-07-01 00:00:01,54.24,0.0,0.0,48.00,6.24,0.0,240701000001~DE6298C4-0D42-4891-834C-9D6024BC2FBE,2024-07-01,16133,...,240630193108~012D0931-2814-46EC-800B-8ED75CA5639D,65.09,1,10.85,54.24,9f79060e92c65355d1ea076dde2d70844b20d4f7a2a4bf...,221215141609~CD137075-3A91-469A-ACE6-264E200135DD,BREWERY,Nepean,CA
3999776,2024-07-01 00:00:25,11.12,0.0,0.0,9.84,1.28,0.0,240701000025~A28B2027-4509-444E-98CE-D03A50879ED3,2024-07-01,33,...,240630235952~BB20A754-C9D9-44C9-BBDB-EB695005707D,11.12,1,0.00,11.12,d941af969089c4a74e62b464e1fb0499a01f863542becc...,230720221939~071CC9E5-234E-4B4E-A740-E4C664D28038,FAST_CASUAL,Oshawa,CA
87277,2024-07-01 00:00:41,42.94,0.0,0.0,38.00,4.94,0.0,240701000041~E35FC760-8D32-4761-B930-1980E7DD38EF,2024-07-01,9720,...,240630211841~744D1E98-0B56-479C-8266-D28627DF2939,42.94,1,0.00,42.94,cbe8185d766c12f3cd3e05c20a277e812de9f10b6a9efd...,210729121222~63041A2C-BDF5-461D-9F85-A02B7C2388E7,BAR,Toronto,CA
997675,2024-07-01 00:00:43,82.49,0.0,0.0,73.00,9.49,0.0,240701000043~50EFE82B-31A3-4759-9368-D5DD6593A564,2024-07-01,12792,...,240630202731~3DAB3A5B-C419-4819-A455-919D8FD0453A,95.69,1,13.20,82.49,1f679114a0bef51573d1761acf0b715376a8d6eb1d02e0...,95609903-ef9f-48bf-b775-826350187fbe,BAR,Toronto,CA
6463855,2024-07-01 00:00:43,83.62,0.0,0.0,74.00,9.62,0.0,240701000043~DAB289D9-67DC-4C2A-92FD-232659503DDB,2024-07-01,12792,...,240630202731~3DAB3A5B-C419-4819-A455-919D8FD0453A,100.34,1,16.72,83.62,1f679114a0bef51573d1761acf0b715376a8d6eb1d02e0...,95609903-ef9f-48bf-b775-826350187fbe,BAR,Toronto,CA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382506,2025-01-01 21:31:46,14.50,0.5,0.0,14.50,0.00,0.0,250101213146~EF5E0F90-AB15-4D42-BDFC-15103438EE04,2024-12-31,12516,...,250101180310~345076B4-2045-4DA7-B340-A4B7FCC5851D,14.50,1,0.00,14.50,ac280071057c43a28522a011a227eb4e8ce38c3c5be9e3...,210609162236~504F1729-C2F6-4586-9DD7-08F7A94A0D4E,BAR,Bend,US
3383401,2025-01-01 21:36:34,74.00,0.0,0.0,74.00,0.00,0.0,250101213634~3DDC466E-C00C-40EC-9BD3-44F5486F1412,2024-12-31,10201,...,250101184633~DBB7CEC7-4C32-4C0A-9CD1-06C8945307B7,87.32,1,13.32,74.00,ac280071057c43a28522a011a227eb4e8ce38c3c5be9e3...,210609162236~504F1729-C2F6-4586-9DD7-08F7A94A0D4E,BAR,Bend,US
3148275,2025-01-01 21:39:19,27.50,0.0,0.0,27.50,0.00,0.0,250101213919~E9515A24-E1AC-4984-A201-1BF35081D248,2024-12-31,12832,...,250101180527~35D445C5-2BD9-4338-A72C-3257EEA0C252,33.00,1,5.50,27.50,ac280071057c43a28522a011a227eb4e8ce38c3c5be9e3...,210609162236~504F1729-C2F6-4586-9DD7-08F7A94A0D4E,BAR,Bend,US
3136475,2025-01-01 21:40:50,52.50,0.0,0.0,52.50,0.00,0.0,250101214050~3A47D8F6-DA21-4D74-AF57-CDB623AB09A0,2024-12-31,10606,...,250101184404~4848C6DF-85EA-4E40-AD17-20F30E4D4357,63.00,1,10.50,52.50,ac280071057c43a28522a011a227eb4e8ce38c3c5be9e3...,210609162236~504F1729-C2F6-4586-9DD7-08F7A94A0D4E,BAR,Bend,US


In [125]:
len(df_merged['city'].unique())

282

In [131]:
df_merged['city'].unique()

array(['Grand Forks', 'San Diego', 'Edmonton', 'Swansboro', 'Othello',
       'Orange', 'Kingston', 'Madison', 'Naples', 'St. Catharines',
       'Goodfield', 'Thousand Oaks', 'Jackpot', 'Crested Butte',
       'Dennis Port', 'Vancouver', 'Austin', 'Albany', 'Banff',
       'Abbotsford', 'Rome', 'Scarborough', 'Fremont', 'Dartmouth',
       'Toledo', 'Bath', 'Grand Rapids', 'Toronto', 'Ridgeway',
       'Cambridge', 'Kingsville', 'Ness City', 'Manquin', 'Ottawa',
       'Comstock Park', 'Sun  Peaks', 'Baker City', 'Clovis',
       'Union city', 'McAlester', 'Mississauga', 'Whitchurch-Stoufville',
       'McAlisterville', 'Santa Ysabel', 'Murray', 'Squamish',
       'Alto Pass ', 'Burlington', 'POCATELLO', 'Comox', 'Saskatoon',
       'Lindenhurst', 'Canton', 'Scarborough ', 'Palm Springs',
       'Victoria', 'Los Angeles', 'Kitchener', 'Vermilion', 'Ucluelet',
       'Kelowna', 'Calgary', 'Dublin', 'North Vancouver', 'Chicago',
       'Portsmouth', 'Prineville', 'Port Elgin', 'Ladysmit

# ML Prediction Model