# Top 10 selling products by income groups

## Importing libraries

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

## Creating path for folder

In [4]:
path = r'/Users/ceciliamoura/Desktop/Career Foundry/Achievement4/Instacart Basket Analysis'

## Importing files

In [10]:
# Importing subset with only high activity customers

In [6]:
instacart = pd.read_pickle(os.path.join (path, 'Data', 'Final Prepared Data', 'IC_high_activity_2.pkl'))

In [8]:
instacart.columns

Index(['order_id', 'product_id', 'add_to_cart_order', 'reordered', 'user_id',
       'order_number', 'order_day_of_week', 'order_time',
       'days_since_prior_order', 'product_name', 'aisle_id', 'department_id',
       'prices', 'Gender', 'State', 'Age', 'date_joined', 'n_dependants',
       'fam_status', 'income', 'max_order', 'max_order_per_user',
       'products_per_order', 'order_cost', 'Region', 'age_group',
       'loyalty_flag', 'mean_order_cost_per_user', 'spending_flag',
       'income_group', 'median_days_since_prior_order_per_user', 'freq_buyer'],
      dtype='object')

In [13]:
instacart.shape

(30963518, 32)

## Subsetting for analysis

In [12]:
subset = instacart[['product_name', 'prices', 'income_group', 'department_id']]

In [16]:
subset['income_group'].value_counts()

income_group
Low          7741104
Medium       7740979
Very High    7740813
High         7740622
Name: count, dtype: int64

In [17]:
subset.shape

(30963518, 3)

In [18]:
subset_low = subset.loc[subset['income_group'] == 'Low']

In [19]:
subset_medium = subset.loc[subset['income_group'] == 'Medium']

In [20]:
subset_high = subset.loc[subset['income_group'] == 'High']

In [21]:
subset_very_high = subset.loc[subset['income_group'] == 'Very High']

## Top 10 in low-income group

In [23]:
subset.columns

Index(['product_name', 'prices', 'income_group'], dtype='object')

In [24]:
products_low = subset_low['product_name'].value_counts()

In [25]:
products_low.head(10)

product_name
Banana                    99324
Bag of Organic Bananas    93142
Organic Strawberries      59893
Organic Baby Spinach      52664
Organic Hass Avocado      49788
Strawberries              43033
Organic Avocado           35771
Organic Raspberries       32675
Large Lemon               29166
Organic Half & Half       27256
Name: count, dtype: int64

## Top 10 in medium-income group

In [27]:
products_medium = subset_medium['product_name'].value_counts()

In [28]:
products_medium.head(10)

product_name
Banana                    119289
Bag of Organic Bananas     91199
Organic Strawberries       66385
Organic Baby Spinach       60462
Organic Hass Avocado       52413
Organic Avocado            45038
Large Lemon                38415
Limes                      37326
Organic Whole Milk         36011
Organic Raspberries        33326
Name: count, dtype: int64

## Top 10 in high-income group

In [30]:
products_high = subset_high['product_name'].value_counts()

In [31]:
products_high.head(10)

product_name
Banana                    116183
Bag of Organic Bananas     93005
Organic Strawberries       66087
Organic Baby Spinach       59487
Organic Hass Avocado       53743
Organic Avocado            43923
Large Lemon                39455
Organic Whole Milk         36625
Limes                      35663
Organic Raspberries        33941
Name: count, dtype: int64

## Top 10 in very-high-income group

In [33]:
products_very_high = subset_very_high['product_name'].value_counts()

In [34]:
products_very_high.head(10)

product_name
Banana                    118368
Bag of Organic Bananas     89057
Organic Strawberries       63623
Organic Baby Spinach       59159
Organic Hass Avocado       50855
Organic Avocado            43535
Large Lemon                38361
Limes                      35841
Organic Whole Milk         35146
Organic Raspberries        33003
Name: count, dtype: int64

## Best selling departments

In [36]:
instacart['department_id'].value_counts()

department_id
4     9079273
16    5176667
19    2766406
7     2571893
1     2121731
13    1782705
3     1120828
15    1012074
20    1003834
9      822136
17     699857
12     674781
14     670850
11     423791
18     410384
6      255991
5      144627
8       93060
21      64768
2       34411
10      33451
Name: count, dtype: int64

## Revenue by department

In [71]:
#Revenue by department

revenue_department = instacart.groupby(['department_id'])['prices'].transform('sum')

In [73]:
instacart['department_rev'] = revenue_department

In [75]:
# Subsetting just 'department_id' and 'department_rev'
dep_subset = instacart[['department_id', 'department_rev']]

In [79]:
dep_subset.info

<bound method DataFrame.info of           department_id  department_rev
0                    16      43116767.2
1                     4      72455761.7
2                    13      14287456.5
3                    13      14287456.5
4                    13      14287456.5
...                 ...             ...
32403714             17       5164910.3
32403715             19      11827920.3
32403716              1      16408884.0
32403717              4      72455761.7
32403718             19      11827920.3

[30963518 rows x 2 columns]>

In [77]:
dep_subset.head()

Unnamed: 0,department_id,department_rev
0,16,43116767.2
1,4,72455761.7
2,13,14287456.5
3,13,14287456.5
4,13,14287456.5


In [81]:
# Creating a subset withou duplicates
dep_subset_nodups = dep_subset.drop_duplicates()

In [83]:
dep_subset_nodups

Unnamed: 0,department_id,department_rev
0,16,43116767.2
1,4,72455761.7
2,13,14287456.5
15,12,10998953.4
16,3,8806886.2
18,11,3388456.0
19,19,11827920.3
20,14,5388030.2
27,7,19758722.7
32,20,7813188.1


In [87]:
dep_subset_nodups = dep_subset_nodups.sort_values(by='department_rev', ascending=False)



In [89]:
dep_subset_nodups

Unnamed: 0,department_id,department_rev
1,4,72455761.7
0,16,43116767.2
27,7,19758722.7
60,1,16408884.0
2,13,14287456.5
19,19,11827920.3
15,12,10998953.4
16,3,8806886.2
32,20,7813188.1
67,15,7637778.5


## 3. Exporting 

In [57]:
print(data_dict.get('32403714'))

None


In [38]:
instacart.to_pickle(os.path.join(path, 'Data', 'Final Prepared Data', 'IC_high_activity_2.pkl'))

In [39]:
instacart.to_pickle(os.path.join(path, 'Data', 'Final Prepared Data', 'backup', 'IC_high_activity_2.pkl'))

## Importing data dictionary for dapartment_id

In [20]:
data_dict = pd.read_csv(os.path.join (path, 'Data', 'Final Prepared Data', 'depart_data_dict.csv'))

In [70]:
print(data_dict.get('19'))

0    snacks
Name: 19, dtype: object


In [73]:
print(data_dict.get('4'))

0    produce
Name: 4, dtype: object


In [75]:
print(data_dict.get('16'))

0    dairy eggs
Name: 16, dtype: object


In [77]:
print(data_dict.get('19'))

0    snacks
Name: 19, dtype: object


In [79]:
print(data_dict.get('7'))

0    beverages
Name: 7, dtype: object


In [83]:
print(data_dict.get('1'))

0    frozen
Name: 1, dtype: object


In [91]:
print(data_dict.get('13'))

0    pantry
Name: 13, dtype: object
