## **Data Preparation**
### This includes data preprocessing, cleaning and preparing the data according to our requirements so that visualization and forecasting are simplified.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
dataset=pd.read_csv('/content/Groceries_dataset.csv')
dataset.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [3]:
dataset.tail()

Unnamed: 0,Member_number,Date,itemDescription
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice
38764,1521,26-12-2014,cat food


In [4]:
dataset.shape


(38765, 3)

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38765 entries, 0 to 38764
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Member_number    38765 non-null  int64 
 1   Date             38765 non-null  object
 2   itemDescription  38765 non-null  object
dtypes: int64(1), object(2)
memory usage: 908.7+ KB


In [6]:
dataset.columns

Index(['Member_number', 'Date', 'itemDescription'], dtype='object')

In [7]:
dataset.describe()

Unnamed: 0,Member_number
count,38765.0
mean,3003.641868
std,1153.611031
min,1000.0
25%,2002.0
50%,3005.0
75%,4007.0
max,5000.0


In [8]:
dataset.isnull().sum()

Member_number      0
Date               0
itemDescription    0
dtype: int64

In [9]:
dataset.isnull().sum().sum()

0

### The above 2 outputs show that the dataset is cleaned that is there are no missing or NAN values present in the dataset.

In [10]:
dataset['Member_number'].value_counts()

3180    36
3050    33
2051    33
3737    33
2433    31
        ..
4565     2
2373     2
3788     2
2614     2
2844     2
Name: Member_number, Length: 3898, dtype: int64

In [11]:
dataset['Date'].value_counts()

21-01-2015    96
21-07-2015    93
08-08-2015    92
29-11-2015    92
30-04-2015    91
              ..
10-01-2014    26
04-07-2014    24
16-03-2015    23
17-03-2015    23
01-09-2015    22
Name: Date, Length: 728, dtype: int64

In [12]:
dataset['Date'].unique()

array(['21-07-2015', '05-01-2015', '19-09-2015', '12-12-2015',
       '01-02-2015', '14-02-2015', '08-05-2015', '23-12-2015',
       '20-03-2015', '12-02-2015', '24-02-2015', '14-04-2015',
       '03-09-2015', '30-03-2015', '03-05-2015', '02-09-2015',
       '03-08-2015', '07-07-2015', '08-09-2015', '12-11-2015',
       '18-09-2015', '30-11-2015', '17-07-2015', '12-06-2015',
       '13-02-2015', '14-01-2015', '08-03-2015', '27-01-2015',
       '22-10-2015', '26-10-2015', '08-10-2015', '03-10-2015',
       '11-04-2015', '21-02-2015', '29-09-2015', '09-01-2015',
       '03-04-2015', '20-04-2015', '04-08-2015', '19-07-2015',
       '06-10-2015', '16-10-2015', '06-06-2015', '26-09-2015',
       '15-08-2015', '21-05-2015', '25-08-2015', '11-03-2015',
       '05-06-2015', '28-07-2015', '05-09-2015', '19-06-2015',
       '11-10-2015', '16-06-2015', '20-01-2015', '15-02-2015',
       '13-04-2015', '07-02-2015', '02-03-2015', '14-03-2015',
       '22-12-2015', '01-07-2015', '03-11-2015', '12-04

In [13]:
dataset['Date'].nunique()

728

In [14]:
dataset['itemDescription'].value_counts()

whole milk               2502
other vegetables         1898
rolls/buns               1716
soda                     1514
yogurt                   1334
                         ... 
toilet cleaner              5
bags                        4
baby cosmetics              3
kitchen utensil             1
preservation products       1
Name: itemDescription, Length: 167, dtype: int64

In [15]:
dataset['itemDescription'].unique()

array(['tropical fruit', 'whole milk', 'pip fruit', 'other vegetables',
       'rolls/buns', 'pot plants', 'citrus fruit', 'beef', 'frankfurter',
       'chicken', 'butter', 'fruit/vegetable juice',
       'packaged fruit/vegetables', 'chocolate', 'specialty bar',
       'butter milk', 'bottled water', 'yogurt', 'sausage', 'brown bread',
       'hamburger meat', 'root vegetables', 'pork', 'pastry',
       'canned beer', 'berries', 'coffee', 'misc. beverages', 'ham',
       'turkey', 'curd cheese', 'red/blush wine',
       'frozen potato products', 'flour', 'sugar', 'frozen meals',
       'herbs', 'soda', 'detergent', 'grapes', 'processed cheese', 'fish',
       'sparkling wine', 'newspapers', 'curd', 'pasta', 'popcorn',
       'finished products', 'beverages', 'bottled beer', 'dessert',
       'dog food', 'specialty chocolate', 'condensed milk', 'cleaner',
       'white wine', 'meat', 'ice cream', 'hard cheese', 'cream cheese ',
       'liquor', 'pickled vegetables', 'liquor (appetizer

In [16]:
dataset

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice


In [17]:
data=dataset.groupby('itemDescription')
data.first()

Unnamed: 0_level_0,Member_number,Date
itemDescription,Unnamed: 1_level_1,Unnamed: 2_level_1
Instant food products,1746,15-02-2015
UHT-milk,2836,29-12-2015
abrasive cleaner,2421,21-11-2015
artif. sweetener,1273,04-05-2015
baby cosmetics,2070,19-12-2015
...,...,...
white bread,3950,01-10-2015
white wine,4389,13-04-2015
whole milk,2552,05-01-2015
yogurt,4056,12-06-2015


In [18]:
dataset['itemDescription'].nunique()

167

In [19]:
dataset['itemDescription'].unique()

array(['tropical fruit', 'whole milk', 'pip fruit', 'other vegetables',
       'rolls/buns', 'pot plants', 'citrus fruit', 'beef', 'frankfurter',
       'chicken', 'butter', 'fruit/vegetable juice',
       'packaged fruit/vegetables', 'chocolate', 'specialty bar',
       'butter milk', 'bottled water', 'yogurt', 'sausage', 'brown bread',
       'hamburger meat', 'root vegetables', 'pork', 'pastry',
       'canned beer', 'berries', 'coffee', 'misc. beverages', 'ham',
       'turkey', 'curd cheese', 'red/blush wine',
       'frozen potato products', 'flour', 'sugar', 'frozen meals',
       'herbs', 'soda', 'detergent', 'grapes', 'processed cheese', 'fish',
       'sparkling wine', 'newspapers', 'curd', 'pasta', 'popcorn',
       'finished products', 'beverages', 'bottled beer', 'dessert',
       'dog food', 'specialty chocolate', 'condensed milk', 'cleaner',
       'white wine', 'meat', 'ice cream', 'hard cheese', 'cream cheese ',
       'liquor', 'pickled vegetables', 'liquor (appetizer

## Preparing itemDescription column.

In [20]:
data1=dataset.replace(to_replace =['tropical fruit','pip fruit', 'other vegetables','citrus fruit','fruit/vegetable juice',
       'packaged fruit/vegetables', 'root vegetables', 'berries', 'herbs','grapes','pickled vegetables', 'onions','canned vegetables',
       'canned fruit', 'frozen fruits','specialty vegetables', ], 
                            value ="Fruits & Vegetables")
data1

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [21]:
data1['itemDescription'].nunique()

152

In [22]:
data2=data1.replace(to_replace =[ 'pot plants','cat food','frankfurter','detergent','newspapers','finished products','dog food', 'cleaner','hair spray', 'photo/film',
           'shopping bags','dish cleaner','pet care','female sanitary products', 'cling film/bags', 'soap','house keeping products','decalcifier'
               ,'hygiene articles','light bulbs','cookware','bathroom cleaner', 'prosecco', 'liver loaf',
        'zwieback','baby cosmetics', 'napkins','long life bakery product', 'bags',  'dental care', 'roll products ',
        'kitchen towels', 'flower soil/fertilizer', 'male cosmetics', 'candles','tidbits', 'seasonal products','abrasive cleaner', 'cream', 'skin care',
        'rubbing alcohol','softener',
        'organic products','kitchen utensil',
        'flower (seeds)','make up remover', 'toilet cleaner', 'preservation products','dishes',], 
                            value ="Non-food products")
data2

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [23]:
data2['itemDescription'].nunique()

104

In [24]:
data2['itemDescription'].unique()

array(['Fruits & Vegetables', 'whole milk', 'rolls/buns',
       'Non-food products', 'beef', 'chicken', 'butter', 'chocolate',
       'specialty bar', 'butter milk', 'bottled water', 'yogurt',
       'sausage', 'brown bread', 'hamburger meat', 'pork', 'pastry',
       'canned beer', 'coffee', 'misc. beverages', 'ham', 'turkey',
       'curd cheese', 'red/blush wine', 'frozen potato products', 'flour',
       'sugar', 'frozen meals', 'soda', 'processed cheese', 'fish',
       'sparkling wine', 'curd', 'pasta', 'popcorn', 'beverages',
       'bottled beer', 'dessert', 'specialty chocolate', 'condensed milk',
       'white wine', 'meat', 'ice cream', 'hard cheese', 'cream cheese ',
       'liquor', 'liquor (appetizer)', 'UHT-milk', 'candy',
       'domestic eggs', 'margarine', 'salt', 'oil', 'whipped/sour cream',
       'frozen vegetables', 'sliced cheese', 'baking powder',
       'specialty cheese', 'salty snack', 'Instant food products',
       'white bread', 'frozen chicken', 'spread 

In [25]:
data3=data2.replace(to_replace =['whole milk','butter', 'butter milk','yogurt','curd cheese', 'processed cheese', 'curd','condensed milk',
                                 'hard cheese', 'cream cheese ','UHT-milk','whipped/sour cream', 'sliced cheese','specialty cheese', 'spread cheese',
                                 'soft cheese', ], 
                            value ="Milk Products")
data3

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [26]:
data3['itemDescription'].nunique()

89

In [27]:
data3['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'rolls/buns',
       'Non-food products', 'beef', 'chicken', 'chocolate',
       'specialty bar', 'bottled water', 'sausage', 'brown bread',
       'hamburger meat', 'pork', 'pastry', 'canned beer', 'coffee',
       'misc. beverages', 'ham', 'turkey', 'red/blush wine',
       'frozen potato products', 'flour', 'sugar', 'frozen meals', 'soda',
       'fish', 'sparkling wine', 'pasta', 'popcorn', 'beverages',
       'bottled beer', 'dessert', 'specialty chocolate', 'white wine',
       'meat', 'ice cream', 'liquor', 'liquor (appetizer)', 'candy',
       'domestic eggs', 'margarine', 'salt', 'oil', 'frozen vegetables',
       'baking powder', 'salty snack', 'Instant food products',
       'white bread', 'frozen chicken', 'frozen dessert', 'vinegar',
       'nuts/prunes', 'potato products', 'frozen fish',
       'artif. sweetener', 'chewing gum', 'canned fish',
       'semi-finished bread', 'brandy', 'spices', 'waffles', 'sauces',
       'rum'

In [28]:
data4=data3.replace(to_replace =['frozen potato products', 'frozen meals','frozen vegetables','frozen chicken', 'frozen dessert', 
                                 'frozen fish',], 
                            value ="Frozen food Products")
data4

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [29]:
data4['itemDescription'].nunique()

84

In [30]:
data4['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'rolls/buns',
       'Non-food products', 'beef', 'chicken', 'chocolate',
       'specialty bar', 'bottled water', 'sausage', 'brown bread',
       'hamburger meat', 'pork', 'pastry', 'canned beer', 'coffee',
       'misc. beverages', 'ham', 'turkey', 'red/blush wine',
       'Frozen food Products', 'flour', 'sugar', 'soda', 'fish',
       'sparkling wine', 'pasta', 'popcorn', 'beverages', 'bottled beer',
       'dessert', 'specialty chocolate', 'white wine', 'meat',
       'ice cream', 'liquor', 'liquor (appetizer)', 'candy',
       'domestic eggs', 'margarine', 'salt', 'oil', 'baking powder',
       'salty snack', 'Instant food products', 'white bread', 'vinegar',
       'nuts/prunes', 'potato products', 'artif. sweetener',
       'chewing gum', 'canned fish', 'semi-finished bread', 'brandy',
       'spices', 'waffles', 'sauces', 'rum', 'chocolate marshmallow',
       'sweet spreads', 'soups', 'mustard', 'specialty fat',
       'instant 

In [31]:
data5=data4.replace(to_replace =['rolls/buns','sausage', 'brown bread', 'pastry', 'ham','dessert', 'specialty chocolate',
                                 'white bread','semi-finished bread','waffles', ], 
                            value ="Bakery Products")
data5

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [32]:
data5['itemDescription'].nunique()

75

In [33]:
data5['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'beef', 'chicken', 'chocolate',
       'specialty bar', 'bottled water', 'hamburger meat', 'pork',
       'canned beer', 'coffee', 'misc. beverages', 'turkey',
       'red/blush wine', 'Frozen food Products', 'flour', 'sugar', 'soda',
       'fish', 'sparkling wine', 'pasta', 'popcorn', 'beverages',
       'bottled beer', 'white wine', 'meat', 'ice cream', 'liquor',
       'liquor (appetizer)', 'candy', 'domestic eggs', 'margarine',
       'salt', 'oil', 'baking powder', 'salty snack',
       'Instant food products', 'vinegar', 'nuts/prunes',
       'potato products', 'artif. sweetener', 'chewing gum',
       'canned fish', 'brandy', 'spices', 'sauces', 'rum',
       'chocolate marshmallow', 'sweet spreads', 'soups', 'mustard',
       'specialty fat', 'instant coffee', 'snack products',
       'organic sausage', 'mayonnaise', 'cereals', 'meat spreads',
       'whisky', 'cooking chocolate', 'li

In [34]:
data6=data5.replace(to_replace =['bottled water','canned beer', 'coffee', 'misc. beverages','red/blush wine','soda','sparkling wine','beverages',
                                 'bottled beer', 'white wine','liquor',
       'liquor (appetizer)','rum','instant coffee','whisky','liqueur', 'syrup',  'vinegar','cocoa drinks','tea',], 
                            value ="Coffee, Tea & Beverages")

data6

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,Fruits & Vegetables


In [35]:
data6['itemDescription'].nunique()

56

In [36]:
data6['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'beef', 'chicken', 'chocolate',
       'specialty bar', 'Coffee, Tea & Beverages', 'hamburger meat',
       'pork', 'turkey', 'Frozen food Products', 'flour', 'sugar', 'fish',
       'pasta', 'popcorn', 'meat', 'ice cream', 'candy', 'domestic eggs',
       'margarine', 'salt', 'oil', 'baking powder', 'salty snack',
       'Instant food products', 'nuts/prunes', 'potato products',
       'artif. sweetener', 'chewing gum', 'canned fish', 'brandy',
       'spices', 'sauces', 'chocolate marshmallow', 'sweet spreads',
       'soups', 'mustard', 'specialty fat', 'snack products',
       'organic sausage', 'mayonnaise', 'cereals', 'meat spreads',
       'cooking chocolate', 'ketchup', 'nut snack', 'cake bar', 'honey',
       'jam', 'rice', 'salad dressing', 'pudding powder', 'ready soups'],
      dtype=object)

In [37]:
data7=data6.replace(to_replace =['snack products', 'pasta', 'popcorn','margarine','salty snack',
       'Instant food products','nuts/prunes', 'potato products','chocolate marshmallow', 'sweet spreads',
       'soups','mayonnaise','ketchup', 'nut snack', 'cake bar', 'honey',
       'jam', 'rice', 'salad dressing', 'pudding powder', 'ready soups' ], 
                            value ="Snack food Products")
data7

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [38]:
data7['itemDescription'].nunique()

36

In [39]:
data7['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'beef', 'chicken', 'chocolate',
       'specialty bar', 'Coffee, Tea & Beverages', 'hamburger meat',
       'pork', 'turkey', 'Frozen food Products', 'flour', 'sugar', 'fish',
       'Snack food Products', 'meat', 'ice cream', 'candy',
       'domestic eggs', 'salt', 'oil', 'baking powder',
       'artif. sweetener', 'chewing gum', 'canned fish', 'brandy',
       'spices', 'sauces', 'mustard', 'specialty fat', 'organic sausage',
       'cereals', 'meat spreads', 'cooking chocolate'], dtype=object)

In [40]:
data8=data7.replace(to_replace =[  'beef', 'chicken','hamburger meat',
       'pork', 'turkey', 'fish','meat','canned fish', 'mustard', 'organic sausage','meat spreads',], 
                            value ="Non-vegetarian food products")
data8

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [41]:
data8['itemDescription'].nunique()

26

In [42]:
data8['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'Non-vegetarian food products', 'chocolate',
       'specialty bar', 'Coffee, Tea & Beverages', 'Frozen food Products',
       'flour', 'sugar', 'Snack food Products', 'ice cream', 'candy',
       'domestic eggs', 'salt', 'oil', 'baking powder',
       'artif. sweetener', 'chewing gum', 'brandy', 'spices', 'sauces',
       'specialty fat', 'cereals', 'cooking chocolate'], dtype=object)

In [43]:
data9=data8.replace(to_replace =[ 'flour', 'sugar', 'domestic eggs', 'salt', 'oil', 'baking powder',
       'artif. sweetener', 'chewing gum', 'brandy', 'spices', 'sauces','specialty fat', 'cereals',], 
                            value ="Basic food products")
data9

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [44]:
data9['itemDescription'].nunique()

14

In [45]:

data9['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'Non-vegetarian food products', 'chocolate',
       'specialty bar', 'Coffee, Tea & Beverages', 'Frozen food Products',
       'Basic food products', 'Snack food Products', 'ice cream', 'candy',
       'cooking chocolate'], dtype=object)

In [46]:
data10=data9.replace(to_replace =[ 'chocolate',
       'specialty bar','ice cream', 'candy',
       'cooking chocolate'], 
                            value ="Sweet food products")
data10

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [47]:
data10['itemDescription'].nunique()

10

In [48]:
data10['itemDescription'].unique()

array(['Fruits & Vegetables', 'Milk Products', 'Bakery Products',
       'Non-food products', 'Non-vegetarian food products',
       'Sweet food products', 'Coffee, Tea & Beverages',
       'Frozen food Products', 'Basic food products',
       'Snack food Products'], dtype=object)

In [49]:
data10

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,05-01-2015,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [50]:
data10['itemDescription'].value_counts()

Milk Products                   7542
Fruits & Vegetables             7473
Bakery Products                 5632
Coffee, Tea & Beverages         5590
Non-food products               4658
Non-vegetarian food products    2459
Basic food products             1816
Snack food Products             1619
Sweet food products             1028
Frozen food Products             948
Name: itemDescription, dtype: int64

## Preparing Date column

In [51]:
data11=data10.replace(to_replace =['01-01-2015','02-01-2015','03-01-2015','04-01-2015','05-01-2015',
'06-01-2015','07-01-2015','08-01-2015','09-01-2015','10-01-2015',
'11-01-2015','12-01-2015','13-01-2015','14-01-2015', '15-01-2015',
'16-01-2015','17-01-2015','18-01-2015','19-01-2015','20-01-2015',
'21-01-2015','22-01-2015','23-01-2015','24-01-2015','25-01-2015',
'26-01-2015','27-01-2015','28-01-2015','29-01-2015','30-01-2015','31-01-2015' ], 
                            value ="2015-01")

data11

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,01-02-2015,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [52]:
data11['Date'].nunique()

698

In [53]:
data12=data11.replace(to_replace =['01-02-2015','02-02-2015','03-02-2015','04-02-2015','05-02-2015',
'06-02-2015','07-02-2015','08-02-2015','09-02-2015','10-02-2015',
'11-02-2015','12-02-2015','13-02-2015','14-02-2015', '15-02-2015',
'16-02-2015','17-02-2015','18-02-2015','19-02-2015','20-02-2015',
'21-02-2015','22-02-2015','23-02-2015','24-02-2015','25-02-2015',
'26-02-2015','27-02-2015','28-02-2015','29-02-2015'], 
                            value ="2015-02")

data12

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [54]:
data12['Date'].nunique()

671

In [55]:
data13=data12.replace(to_replace =['01-03-2015','02-03-2015','03-03-2015','04-03-2015','05-03-2015',
'06-03-2015','07-03-2015','08-03-2015','09-03-2015','10-03-2015',
'11-03-2015','12-03-2015','13-03-2015','14-03-2015', '15-03-2015',
'16-03-2015','17-03-2015','18-03-2015','19-03-2015','20-03-2015',
'21-03-2015','22-03-2015','23-03-2015','24-03-2015','25-03-2015',
'26-03-2015','27-03-2015','28-03-2015','29-03-2015','30-03-2015','31-03-2015' ], 
                            value ="2015-03")

data13

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [56]:
data13['Date'].nunique()

641

In [57]:
data14=data13.replace(to_replace =['01-04-2015','02-04-2015','03-04-2015','04-04-2015','05-04-2015',
'06-04-2015','07-04-2015','08-04-2015','09-04-2015','10-04-2015',
'11-04-2015','12-04-2015','13-04-2015','14-04-2015', '15-04-2015',
'16-04-2015','17-04-2015','18-04-2015','19-04-2015','20-04-2015',
'21-04-2015','22-04-2015','23-04-2015','24-04-2015','25-04-2015',
'26-04-2015','27-04-2015','28-04-2015','29-04-2015','30-04-2015' ], 
                            value ="2015-04")

data14

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [58]:
data14['Date'].nunique()

612

In [59]:
data15=data14.replace(to_replace =['01-05-2015','02-05-2015','03-05-2015','04-05-2015','05-05-2015',
'06-05-2015','07-05-2015','08-05-2015','09-05-2015','10-05-2015',
'11-05-2015','12-05-2015','13-05-2015','14-05-2015', '15-05-2015',
'16-05-2015','17-05-2015','18-05-2015','19-05-2015','20-05-2015',
'21-05-2015','22-05-2015','23-05-2015','24-05-2015','25-05-2015',
'26-05-2015','27-05-2015','28-05-2015','29-05-2015','30-05-2015','31-05-2015' ], 
                            value ="2015-05")

data15

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [60]:
data15['Date'].nunique()

582

In [61]:
data16=data15.replace(to_replace =['01-06-2015','02-06-2015','03-06-2015','04-06-2015','05-06-2015',
'06-06-2015','07-06-2015','08-06-2015','09-06-2015','10-06-2015',
'11-06-2015','12-06-2015','13-06-2015','14-06-2015', '15-06-2015',
'16-06-2015','17-06-2015','18-06-2015','19-06-2015','20-06-2015',
'21-06-2015','22-06-2015','23-06-2015','24-06-2015','25-06-2015',
'26-06-2015','27-06-2015','28-06-2015','29-06-2015','30-06-2015','31-06-2015' ], 
                            value ="2015-06")

data16

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [62]:
data16['Date'].nunique()

553

In [63]:
data17=data16.replace(to_replace =['01-07-2015','02-07-2015','03-07-2015','04-07-2015','05-07-2015',
'06-07-2015','07-07-2015','08-07-2015','09-07-2015','10-07-2015',
'11-07-2015','12-07-2015','13-07-2015','14-07-2015', '15-07-2015',
'16-07-2015','17-07-2015','18-07-2015','19-07-2015','20-07-2015',
'21-07-2015','22-07-2015','23-07-2015','24-07-2015','25-07-2015',
'26-07-2015','27-07-2015','28-07-2015','29-07-2015','30-07-2015','31-07-2015' ], 
                            value ="2015-07")

data17

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [64]:
data17['Date'].nunique()

523

In [65]:
data18=data17.replace(to_replace =['01-08-2015','02-08-2015','03-08-2015','04-08-2015','05-08-2015',
'06-08-2015','07-08-2015','08-08-2015','09-08-2015','10-08-2015',
'11-08-2015','12-08-2015','13-08-2015','14-08-2015', '15-08-2015',
'16-08-2015','17-08-2015','18-08-2015','19-08-2015','20-08-2015',
'21-08-2015','22-08-2015','23-08-2015','24-08-2015','25-08-2015',
'26-08-2015','27-08-2015','28-08-2015','29-08-2015','30-08-2015','31-08-2015' ], 
                            value ="2015-08")

data18

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,19-09-2015,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [66]:
data18['Date'].nunique()

493

In [67]:
data19=data18.replace(to_replace =['01-09-2015','02-09-2015','03-09-2015','04-09-2015','05-09-2015',
'06-09-2015','07-09-2015','08-09-2015','09-09-2015','10-09-2015',
'11-09-2015','12-09-2015','13-09-2015','14-09-2015', '15-09-2015',
'16-09-2015','17-09-2015','18-09-2015','19-09-2015','20-09-2015',
'21-09-2015','22-09-2015','23-09-2015','24-09-2015','25-09-2015',
'26-09-2015','27-09-2015','28-09-2015','29-09-2015','30-09-2015','31-09-2015' ], 
                            value ="2015-09")

data19

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [68]:
data19['Date'].nunique()

464

In [69]:
data20=data19.replace(to_replace =['01-10-2015','02-10-2015','03-10-2015','04-10-2015','05-10-2015',
'06-10-2015','07-10-2015','08-10-2015','09-10-2015','10-10-2015',
'11-10-2015','12-10-2015','13-10-2015','14-10-2015', '15-10-2015',
'16-10-2015','17-10-2015','18-10-2015','19-10-2015','20-10-2015',
'21-10-2015','22-10-2015','23-10-2015','24-10-2015','25-10-2015',
'26-10-2015','27-10-2015','28-10-2015','29-10-2015','30-10-2015','31-10-2015' ], 
                            value ="2015-10")

data20

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [70]:
data20['Date'].nunique()

434

In [71]:

data21=data20.replace(to_replace =['01-11-2015','02-11-2015','03-11-2015','04-11-2015','05-11-2015',
'06-11-2015','07-11-2015','08-11-2015','09-11-2015','10-11-2015',
'11-11-2015','12-11-2015','13-11-2015','14-11-2015', '15-11-2015',
'16-11-2015','17-11-2015','18-11-2015','19-11-2015','20-11-2015',
'21-11-2015','22-11-2015','23-11-2015','24-11-2015','25-11-2015',
'26-11-2015','27-11-2015','28-11-2015','29-11-2015','30-11-2015','31-11-2015' ], 
                            value ="2015-11")

data21

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,12-12-2015,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [72]:
data21['Date'].nunique()

405

In [73]:
data22=data21.replace(to_replace =['01-12-2015','02-12-2015','03-12-2015','04-12-2015','05-12-2015',
'06-12-2015','07-12-2015','08-12-2015','09-12-2015','10-12-2015',
'11-12-2015','12-12-2015','13-12-2015','14-12-2015','15-12-2015',
'16-12-2015','17-12-2015','18-12-2015','19-12-2015','20-12-2015',
'21-12-2015','22-12-2015','23-12-2015','24-12-2015','25-12-2015',
'26-12-2015','27-12-2015','28-12-2015','29-12-2015','30-12-2015','31-12-2015' ], 
                            value ="2015-12")

data22

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [74]:
data22['Date'].nunique()

376

In [75]:
data23=data22.replace(to_replace =['01-01-2014','02-01-2014','03-01-2014','04-01-2014','05-01-2014',
'06-01-2014','07-01-2014','08-01-2014','09-01-2014','10-01-2014',
'11-01-2014','12-01-2014','13-01-2014','14-01-2014', '15-01-2014',
'16-01-2014','17-01-2014','18-01-2014','19-01-2014','20-01-2014',
'21-01-2014','22-01-2014','23-01-2014','24-01-2014','25-01-2014',
'26-01-2014','27-01-2014','28-01-2014','29-01-2014','30-01-2014','31-01-2014'], 
                            value ="2014-01")

data23

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,03-12-2014,Fruits & Vegetables


In [76]:

data24=data23.replace(to_replace =['01-12-2014','02-12-2014','03-12-2014','04-12-2014','05-12-2014',
'06-12-2014','07-12-2014','08-12-2014','09-12-2014','10-12-2014',
'11-12-2014','12-12-2014','13-12-2014','14-12-2014','15-12-2014',
'16-12-2014','17-12-2014','18-12-2014','19-12-2014','20-12-2014',
'21-12-2014','22-12-2014','23-12-2014','24-12-2014','25-12-2014',
'26-12-2014','27-12-2014','28-12-2014','29-12-2014','30-12-2014','31-12-2014' ], 
                            value ="2014-12")

data24

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [77]:
data25=data24.replace(to_replace =['01-11-2014','02-11-2014','03-11-2014','04-11-2014','05-11-2014',
'06-11-2014','07-11-2014','08-11-2014','09-11-2014','10-11-2014',
'11-11-2014','12-11-2014','13-11-2014','14-11-2014', '15-11-2014',
'16-11-2014','17-11-2014','18-11-2014','19-11-2014','20-11-2014',
'21-11-2014','22-11-2014','23-11-2014','24-11-2014','25-11-2014',
'26-11-2014','27-11-2014','28-11-2014','29-11-2014','30-11-2014','31-11-2014' ], 
                            value ="2014-11")

data25

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,08-10-2014,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [78]:
data26=data25.replace(to_replace =['01-10-2014','02-10-2014','03-10-2014','04-10-2014','05-10-2014',
'06-10-2014','07-10-2014','08-10-2014','09-10-2014','10-10-2014',
'11-10-2014','12-10-2014','13-10-2014','14-10-2014', '15-10-2014',
'16-10-2014','17-10-2014','18-10-2014','19-10-2014','20-10-2014',
'21-10-2014','22-10-2014','23-10-2014','24-10-2014','25-10-2014',
'26-10-2014','27-10-2014','28-10-2014','29-10-2014','30-10-2014','31-10-2014' ], 
                            value ="2014-10")

data26

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [79]:
data27=data26.replace(to_replace =['01-09-2014','02-09-2014','03-09-2014','04-09-2014','05-09-2014',
'06-09-2014','07-09-2014','08-09-2014','09-09-2014','10-09-2014',
'11-09-2014','12-09-2014','13-09-2014','14-09-2014', '15-09-2014',
'16-09-2014','17-09-2014','18-09-2014','19-09-2014','20-09-2014',
'21-09-2014','22-09-2014','23-09-2014','24-09-2014','25-09-2014',
'26-09-2014','27-09-2014','28-09-2014','29-09-2014','30-09-2014','31-09-2014' ], 
                            value ="2014-09")

data27

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [80]:
data28=data27.replace(to_replace =['01-08-2014','02-08-2014','03-08-2014','04-08-2014','05-08-2014',
'06-08-2014','07-08-2014','08-08-2014','09-08-2014','10-08-2014',
'11-08-2014','12-08-2014','13-08-2014','14-08-2014', '15-08-2014',
'16-08-2014','17-08-2014','18-08-2014','19-08-2014','20-08-2014',
'21-08-2014','22-08-2014','23-08-2014','24-08-2014','25-08-2014',
'26-08-2014','27-08-2014','28-08-2014','29-08-2014','30-08-2014','31-08-2014' ], 
                            value ="2014-08")

data28

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [81]:
data29=data28.replace(to_replace =['01-07-2014','02-07-2014','03-07-2014','04-07-2014','05-07-2014',
'06-07-2014','07-07-2014','08-07-2014','09-07-2014','10-07-2014',
'11-07-2014','12-07-2014','13-07-2014','14-07-2014', '15-07-2014',
'16-07-2014','17-07-2014','18-07-2014','19-07-2014','20-07-2014',
'21-07-2014','22-07-2014','23-07-2014','24-07-2014','25-07-2014',
'26-07-2014','27-07-2014','28-07-2014','29-07-2014','30-07-2014','31-07-2014' ], 
                            value ="2014-07")

data29

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [82]:
data30=data29.replace(to_replace =['01-06-2014','02-06-2014','03-06-2014','04-06-2014','05-06-2014',
'06-06-2014','07-06-2014','08-06-2014','09-06-2014','10-06-2014',
'11-06-2014','12-06-2014','13-06-2014','14-06-2014', '15-06-2014',
'16-06-2014','17-06-2014','18-06-2014','19-06-2014','20-06-2014',
'21-06-2014','22-06-2014','23-06-2014','24-06-2014','25-06-2014',
'26-06-2014','27-06-2014','28-06-2014','29-06-2014','30-06-2014','31-06-2014' ], 
                            value ="2014-06")

data30

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [83]:
data31=data30.replace(to_replace =['01-05-2014','02-05-2014','03-05-2014','04-05-2014','05-05-2014',
'06-05-2014','07-05-2014','08-05-2014','09-05-2014','10-05-2014',
'11-05-2014','12-05-2014','13-05-2014','14-05-2014', '15-05-2014',
'16-05-2014','17-05-2014','18-05-2014','19-05-2014','20-05-2014',
'21-05-2014','22-05-2014','23-05-2014','24-05-2014','25-05-2014',
'26-05-2014','27-05-2014','28-05-2014','29-05-2014','30-05-2014','31-05-2014' ], 
                            value ="2014-05")

data31

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,16-04-2014,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [84]:
data32=data31.replace(to_replace =['01-04-2014','02-04-2014','03-04-2014','04-04-2014','05-04-2014',
'06-04-2014','07-04-2014','08-04-2014','09-04-2014','10-04-2014',
'11-04-2014','12-04-2014','13-04-2014','14-04-2014', '15-04-2014',
'16-04-2014','17-04-2014','18-04-2014','19-04-2014','20-04-2014',
'21-04-2014','22-04-2014','23-04-2014','24-04-2014','25-04-2014',
'26-04-2014','27-04-2014','28-04-2014','29-04-2014','30-04-2014' ], 
                            value ="2014-04")

data32

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,2014-04,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [85]:
data33=data32.replace(to_replace =['01-03-2014','02-03-2014','03-03-2014','04-03-2014','05-03-2014',
'06-03-2014','07-03-2014','08-03-2014','09-03-2014','10-03-2014',
'11-03-2014','12-03-2014','13-03-2014','14-03-2014', '15-03-2014',
'16-03-2014','17-03-2014','18-03-2014','19-03-2014','20-03-2014',
'21-03-2014','22-03-2014','23-03-2014','24-03-2014','25-03-2014',
'26-03-2014','27-03-2014','28-03-2014','29-03-2014','30-03-2014','31-03-2014' ], 
                            value ="2014-03")

data33

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,23-02-2014,Sweet food products
38762,1097,2014-04,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [86]:
data34=data33.replace(to_replace =['01-02-2014','02-02-2014','03-02-2014','04-02-2014','05-02-2014',
'06-02-2014','07-02-2014','08-02-2014','09-02-2014','10-02-2014',
'11-02-2014','12-02-2014','13-02-2014','14-02-2014', '15-02-2014',
'16-02-2014','17-02-2014','18-02-2014','19-02-2014','20-02-2014',
'21-02-2014','22-02-2014','23-02-2014','24-02-2014','25-02-2014',
'26-02-2014','27-02-2014','28-02-2014','29-02-2014'], 
                            value ="2014-02")

data34

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,2014-02,Sweet food products
38762,1097,2014-04,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


## Data is prepared and ready for visualizations.

### Lets have a look at prepared data:

In [87]:
data34

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,2014-02,Sweet food products
38762,1097,2014-04,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


In [88]:
data34['Date'].nunique()

24

In [89]:
data34['Date'].unique()

array(['2015-07', '2015-01', '2015-09', '2015-12', '2015-02', '2015-05',
       '2015-03', '2015-04', '2015-08', '2015-11', '2015-06', '2015-10',
       '2014-10', '2014-12', '2014-11', '2014-08', '2014-06', '2014-05',
       '2014-02', '2014-04', '2014-07', '2014-01', '2014-03', '2014-09'],
      dtype=object)

In [90]:
data34['Date'].value_counts()

2015-08    1921
2015-01    1797
2015-05    1793
2015-11    1785
2015-07    1724
2015-03    1722
2015-04    1699
2015-06    1694
2015-10    1670
2014-05    1615
2014-10    1591
2015-09    1587
2014-07    1576
2014-08    1575
2014-06    1570
2014-04    1561
2015-02    1560
2015-12    1536
2014-01    1527
2014-12    1473
2014-09    1472
2014-11    1469
2014-02    1437
2014-03    1411
Name: Date, dtype: int64

In [91]:
data34['Date'].value_counts().sort_index(ascending=True)

2014-01    1527
2014-02    1437
2014-03    1411
2014-04    1561
2014-05    1615
2014-06    1570
2014-07    1576
2014-08    1575
2014-09    1472
2014-10    1591
2014-11    1469
2014-12    1473
2015-01    1797
2015-02    1560
2015-03    1722
2015-04    1699
2015-05    1793
2015-06    1694
2015-07    1724
2015-08    1921
2015-09    1587
2015-10    1670
2015-11    1785
2015-12    1536
Name: Date, dtype: int64

In [92]:
data34['itemDescription'].value_counts()

Milk Products                   7542
Fruits & Vegetables             7473
Bakery Products                 5632
Coffee, Tea & Beverages         5590
Non-food products               4658
Non-vegetarian food products    2459
Basic food products             1816
Snack food Products             1619
Sweet food products             1028
Frozen food Products             948
Name: itemDescription, dtype: int64

In [93]:
data10['itemDescription'].value_counts().sort_index(ascending=True)

Bakery Products                 5632
Basic food products             1816
Coffee, Tea & Beverages         5590
Frozen food Products             948
Fruits & Vegetables             7473
Milk Products                   7542
Non-food products               4658
Non-vegetarian food products    2459
Snack food Products             1619
Sweet food products             1028
Name: itemDescription, dtype: int64

In [94]:
data34

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07,Fruits & Vegetables
1,2552,2015-01,Milk Products
2,2300,2015-09,Fruits & Vegetables
3,1187,2015-12,Fruits & Vegetables
4,3037,2015-02,Milk Products
...,...,...,...
38760,4471,2014-10,Milk Products
38761,2022,2014-02,Sweet food products
38762,1097,2014-04,Snack food Products
38763,1510,2014-12,Fruits & Vegetables


### Saving the prepared data as .csv file:

In [95]:
data34.to_csv('Preparedfooddata.csv')