## Setup

In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

import utils_10 as utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 01 Loading data

In [14]:
rest = utils.Restaurant()

In [6]:
rest.customers.head()

Unnamed: 0,ID,First Name,Last Name,Gender,Company,Occupation
0,1,Joseph,Perkins,Male,Dynazzy,Community Outreach Specialist
1,2,Jennifer,Alvarez,Female,DabZ,Senior Quality Engineer
2,3,Roger,Black,Male,Tagfeed,Account Executive
3,4,Steven,Evans,Male,Fatz,Registered Nurse
4,5,Judy,Morrison,Female,Demivee,Legal Assistant


In [8]:
rest.food.head()

Unnamed: 0,Food ID,Food Item,Price
0,1,Sushi,3.99
1,2,Burrito,9.99
2,3,Taco,2.99
3,4,Quesadilla,4.25
4,5,Pizza,2.49


In [9]:
rest.week1.head()

Unnamed: 0,Customer ID,Food ID
0,537,9
1,97,4
2,658,1
3,202,2
4,155,9


In [27]:
rest = utils.Restaurant(preserve_index=True)
rest.weeks

Unnamed: 0,Unnamed: 1,Customer ID,Food ID
Week 1,0,537,9
Week 1,1,97,4
Week 1,2,658,1
Week 1,3,202,2
Week 1,4,155,9
...,...,...,...
Week 2,245,783,10
Week 2,246,556,10
Week 2,247,547,9
Week 2,248,252,9


In [28]:
rest = utils.Restaurant(preserve_index=False)
rest.weeks

Unnamed: 0,Customer ID,Food ID
0,537,9
1,97,4
2,658,1
3,202,2
4,155,9
...,...,...
495,783,10
496,556,10
497,547,9
498,252,9


## 02 Left Join

In [29]:
rest.week1.head()

Unnamed: 0,Customer ID,Food ID
0,537,9
1,97,4
2,658,1
3,202,2
4,155,9


In [36]:
rest.food

Unnamed: 0,Food ID,Food Item,Price
0,1,Sushi,3.99
1,2,Burrito,9.99
2,3,Taco,2.99
3,4,Quesadilla,4.25
4,5,Pizza,2.49
5,6,Pasta,13.99
6,7,Steak,24.99
7,8,Salad,11.25
8,9,Donut,0.99
9,10,Drink,1.75


In [31]:
rest.week1.shape, rest.food.shape

((250, 2), (10, 3))

In [35]:
merged = pd.merge(rest.week1, rest.food, on=rest.FOOD_ID, how='left')
merged.head()

Unnamed: 0,Customer ID,Food ID,Food Item,Price
0,537,9,Donut,0.99
1,97,4,Quesadilla,4.25
2,658,1,Sushi,3.99
3,202,2,Burrito,9.99
4,155,9,Donut,0.99


In [40]:
rest = utils.Restaurant()

In [37]:
rest.week2.head()

Unnamed: 0,Customer ID,Food ID
0,688,10
1,813,7
2,495,10
3,189,5
4,267,3


In [38]:
rest.customers.head()

Unnamed: 0,ID,First Name,Last Name,Gender,Company,Occupation
0,1,Joseph,Perkins,Male,Dynazzy,Community Outreach Specialist
1,2,Jennifer,Alvarez,Female,DabZ,Senior Quality Engineer
2,3,Roger,Black,Male,Tagfeed,Account Executive
3,4,Steven,Evans,Male,Fatz,Registered Nurse
4,5,Judy,Morrison,Female,Demivee,Legal Assistant


In [50]:
mask = rest.customers[rest.ID] == 688
rest.customers[mask]

Unnamed: 0,ID,First Name,Last Name,Gender,Company,Occupation,Customer ID
687,688,Carl,Williamson,Male,Thoughtmix,Graphic Designer,688


In [42]:
pd.merge(rest.week2, rest.customers, left_on=rest.CUSTOMER_ID, right_on=rest.ID, how='left').head()

Unnamed: 0,Customer ID,Food ID,ID,First Name,Last Name,Gender,Company,Occupation
0,688,10,688,Carl,Williamson,Male,Thoughtmix,Graphic Designer
1,813,7,813,Johnny,Walker,Male,Kayveo,Developer II
2,495,10,495,Deborah,Little,Female,Babbleblab,VP Accounting
3,189,5,189,Roger,Gordon,Male,Skilith,Operator
4,267,3,267,Matthew,Wood,Male,Agimba,Product Engineer


## 03 Coding Challange

In [52]:
# 1 Concatenate the two weeks of sales data into one DataFrame. Assign the week1
# DataFrame a key of "Week 1" and the week2 DataFrame a key of "Week 2".
rest = utils.Restaurant()
rest.weeks.head()

Unnamed: 0,Customer ID,Food ID
0,537,9
1,97,4
2,658,1
3,202,2
4,155,9


In [75]:
# 2 Find the customers who ate at the restaurant both weeks.
common_customers = pd.merge(rest.week1, rest.week2, left_on=rest.CUSTOMER_ID, right_on=rest.CUSTOMER_ID, how='inner')
common_customers_ids = common_customers[rest.CUSTOMER_ID].unique()
print(np.sort(common_customers_ids))

[ 21  30  45  75  77  80 101 155 163 189 233 249 304 310 321 343 459 462
 479 482 503 520 529 537 540 550 574 578 621 628 673 677 741 761 783 798
 799 816 819 867 909 919 922 937 945 968]


In [76]:
s1 = set(rest.week1[rest.CUSTOMER_ID].sort_values().unique())
s2 = set(rest.week2[rest.CUSTOMER_ID].sort_values().unique())
print(np.sort(np.array(list(s1.intersection(s2)))))

[ 21  30  45  75  77  80 101 155 163 189 233 249 304 310 321 343 459 462
 479 482 503 520 529 537 540 550 574 578 621 628 673 677 741 761 783 798
 799 816 819 867 909 919 922 937 945 968]


In [None]:
# 3 Find the customers who ate at the restaurant both weeks and ordered the same
# item each week.
common_orders = pd.merge(rest.week1, rest.week2, 
                         left_on=[rest.CUSTOMER_ID, rest.FOOD_ID], 
                         right_on=[rest.CUSTOMER_ID, rest.FOOD_ID], how='inner')
common_orders_ids = common_orders[rest.CUSTOMER_ID].unique()
print(np.sort(common_orders_ids))

[ 21 233 304 540 578 922 937]


In [78]:
common_orders

Unnamed: 0,Customer ID,Food ID
0,304,3
1,540,3
2,937,10
3,233,3
4,21,4
5,922,1
6,21,4
7,578,5
8,578,5


In [82]:
# 4 Identify which customers came in only on Week 1 and only on Week 2.
only_week1 = pd.merge(rest.week1, rest.week2, 
                      left_on=rest.CUSTOMER_ID, 
                      right_on=rest.CUSTOMER_ID, how='left', indicator=True)
only_week1 = only_week1[only_week1['_merge'] == 'left_only']
only_week1_ids = only_week1[rest.CUSTOMER_ID].unique()
print(np.sort(only_week1_ids))

[   3   10   20   26   38   47   51   53   62   63   64   67   68   71
   74   78   91   92   93   97  100  107  108  110  114  117  121  123
  134  138  140  144  147  148  149  151  159  160  167  174  184  190
  191  202  203  213  225  226  244  250  259  260  263  264  271  274
  282  288  290  296  307  313  315  319  323  327  329  331  332  338
  341  346  348  351  352  357  358  363  368  374  380  385  386  393
  396  399  406  410  413  418  419  427  433  450  472  475  477  483
  491  493  501  504  506  514  515  519  524  527  535  539  549  555
  567  584  586  595  600  602  606  608  641  644  645  646  648  650
  658  669  671  680  682  697  703  725  728  737  738  745  747  749
  758  762  764  772  775  809  812  821  822  828  833  836  848  864
  871  875  876  881  896  902  910  911  912  921  926  934  941  953
  954  961  962  966  985  991 1000]


In [83]:
only_week2 = pd.merge(rest.week2, rest.week1, 
                      left_on=rest.CUSTOMER_ID, 
                      right_on=rest.CUSTOMER_ID, how='left', indicator=True)
only_week2 = only_week2[only_week2['_merge'] == 'left_only']
only_week2_ids = only_week2[rest.CUSTOMER_ID].unique()
print(np.sort(only_week2_ids))

[  8  13  24  27  35  39  46  54  55  56  70  73  81  98 111 122 127 131
 132 136 143 145 156 170 171 175 186 188 193 198 204 208 211 222 234 236
 239 240 251 252 253 257 267 272 275 276 277 281 287 303 305 312 325 359
 361 365 367 372 379 381 397 398 415 423 424 437 443 445 458 463 473 486
 488 489 495 496 505 508 509 511 517 522 526 528 530 534 543 547 548 556
 559 564 570 571 580 581 589 592 596 604 612 620 622 629 630 633 639 647
 653 664 666 668 670 674 681 688 692 693 694 706 709 713 720 726 729 732
 734 735 736 743 746 750 751 752 755 767 780 784 787 791 792 794 805 810
 813 815 827 829 831 847 850 853 855 858 859 861 866 869 877 883 884 888
 893 905 913 927 928 936 938 940 942 951 957 959 969 977 994 996]


In [85]:
# The same using inner joins and sets
both = pd.merge(rest.week1, rest.week2, left_on=rest.CUSTOMER_ID, right_on=rest.CUSTOMER_ID, how='inner')
both_ids = set(both[rest.CUSTOMER_ID].unique())
week1_ids = set(rest.week1[rest.CUSTOMER_ID].unique())
week2_ids = set(rest.week2[rest.CUSTOMER_ID].unique())
only1 = np.sort(np.array(list(week1_ids - both_ids)))
only2 = np.sort(np.array(list(week2_ids - both_ids)))
print(only1)

[   3   10   20   26   38   47   51   53   62   63   64   67   68   71
   74   78   91   92   93   97  100  107  108  110  114  117  121  123
  134  138  140  144  147  148  149  151  159  160  167  174  184  190
  191  202  203  213  225  226  244  250  259  260  263  264  271  274
  282  288  290  296  307  313  315  319  323  327  329  331  332  338
  341  346  348  351  352  357  358  363  368  374  380  385  386  393
  396  399  406  410  413  418  419  427  433  450  472  475  477  483
  491  493  501  504  506  514  515  519  524  527  535  539  549  555
  567  584  586  595  600  602  606  608  641  644  645  646  648  650
  658  669  671  680  682  697  703  725  728  737  738  745  747  749
  758  762  764  772  775  809  812  821  822  828  833  836  848  864
  871  875  876  881  896  902  910  911  912  921  926  934  941  953
  954  961  962  966  985  991 1000]


In [86]:
# 5 Each row in the week1 DataFrame identifies a customer who purchased a food
# item. For each row, pull in the customer’s information from the customers
# DataFrame.
week1_customers = pd.merge(rest.week1, rest.customers, left_on=rest.CUSTOMER_ID, right_on=rest.ID, how='left')
week1_customers.head()

Unnamed: 0,Customer ID,Food ID,ID,First Name,Last Name,Gender,Company,Occupation
0,537,9,537,Cheryl,Carroll,Female,Zoombeat,Registered Nurse
1,97,4,97,Amanda,Watkins,Female,Ozu,Account Coordinator
2,658,1,658,Patrick,Webb,Male,Browsebug,Community Outreach Specialist
3,202,2,202,Louis,Campbell,Male,Rhynoodle,Account Representative III
4,155,9,155,Carolyn,Diaz,Female,Gigazoom,Database Administrator III
