In [2]:
import pandas as pd
import numpy as np

In [4]:
# load coffee_consumption_india.csv
data = pd.read_csv("coffee_consumption_india.csv")

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                               Non-Null Count  Dtype 
---  ------                               --------------  ----- 
 0   City                                 10000 non-null  object
 1   State                                10000 non-null  object
 2   Age Group                            10000 non-null  object
 3   Gender                               10000 non-null  object
 4   Coffee Type                          10000 non-null  object
 5   Daily Cups Consumed                  10000 non-null  int64 
 6   Preferred Coffee Brand               10000 non-null  object
 7   Consumption Setting                  10000 non-null  object
 8   Monthly Coffee Expense (INR)         10000 non-null  int64 
 9   Reason for Consumption               10000 non-null  object
 10  Milk Preference                      10000 non-null  object
 11  Sugar Preference                     10000

In [8]:
data.head()

Unnamed: 0,City,State,Age Group,Gender,Coffee Type,Daily Cups Consumed,Preferred Coffee Brand,Consumption Setting,Monthly Coffee Expense (INR),Reason for Consumption,Milk Preference,Sugar Preference,Awareness of Specialty Coffee,Frequency of Café Visits (Per Week),Preferred Time of Consumption,Influence of Social Media on Choice
0,Delhi,Delhi,18-25,Other,Espresso,4,Bru,Home,1060,Taste,Yes,No Sugar,No,6,Morning,Yes
1,Delhi,Delhi,26-35,Male,Cold Coffee,3,Others,Home,3972,Taste,No,Less Sugar,No,4,Evening,Yes
2,Bangalore,Karnataka,50+,Female,Espresso,2,Bru,Home,666,Habit,Yes,No Sugar,No,6,Morning,No
3,Kolkata,West Bengal,36-50,Male,Filter Coffee,3,Others,Home,3644,Socializing,Yes,Medium Sugar,No,3,Afternoon,Yes
4,Mumbai,Maharashtra,26-35,Female,Instant,3,Bru,Home,1885,Socializing,No,High Sugar,No,4,Afternoon,No


In [10]:
data.isna().sum()

City                                   0
State                                  0
Age Group                              0
Gender                                 0
Coffee Type                            0
Daily Cups Consumed                    0
Preferred Coffee Brand                 0
Consumption Setting                    0
Monthly Coffee Expense (INR)           0
Reason for Consumption                 0
Milk Preference                        0
Sugar Preference                       0
Awareness of Specialty Coffee          0
Frequency of Café Visits (Per Week)    0
Preferred Time of Consumption          0
Influence of Social Media on Choice    0
dtype: int64

In [12]:
data.describe()

Unnamed: 0,Daily Cups Consumed,Monthly Coffee Expense (INR),Frequency of Café Visits (Per Week)
count,10000.0,10000.0,10000.0
mean,3.0062,2575.3853,3.0211
std,1.421254,1387.152155,2.002912
min,1.0,200.0,0.0
25%,2.0,1360.0,1.0
50%,3.0,2578.0,3.0
75%,4.0,3775.25,5.0
max,5.0,4999.0,6.0


In [14]:
data.nunique()

City                                     10
State                                     9
Age Group                                 4
Gender                                    3
Coffee Type                               5
Daily Cups Consumed                       5
Preferred Coffee Brand                    5
Consumption Setting                       3
Monthly Coffee Expense (INR)           4176
Reason for Consumption                    4
Milk Preference                           2
Sugar Preference                          4
Awareness of Specialty Coffee             2
Frequency of Café Visits (Per Week)       7
Preferred Time of Consumption             4
Influence of Social Media on Choice       2
dtype: int64

# Extract Key Insights

### 1️⃣ Which State Consumes the Most Coffee?

In [24]:
# shows number of consumers in each state
data['State'].value_counts()

State
Maharashtra      2004
Gujarat          1037
Telangana        1028
Delhi            1025
Karnataka        1007
West Bengal      1001
Uttar Pradesh     981
Tamil Nadu        976
Rajasthan         941
Name: count, dtype: int64

In [32]:
# shows daily cups consumed in each state
consumer_state = data.groupby('State')['Daily Cups Consumed'].sum().sort_values(ascending = False)

In [34]:
consumer_state

State
Maharashtra      6068
Delhi            3150
Gujarat          3116
Telangana        3106
West Bengal      3042
Karnataka        2961
Tamil Nadu       2889
Uttar Pradesh    2870
Rajasthan        2860
Name: Daily Cups Consumed, dtype: int64

In [44]:
# the state that consumes the most coffee
consumer_state.idxmax()

'Maharashtra'

### 2️⃣ Which State Has the Highest Per Capita Coffee Consumption?

In [49]:
state_average_consumption = data.groupby('State')['Daily Cups Consumed'].mean().sort_values(ascending = False)

In [51]:
state_average_consumption

State
Delhi            3.073171
Rajasthan        3.039320
West Bengal      3.038961
Maharashtra      3.027944
Telangana        3.021401
Gujarat          3.004822
Tamil Nadu       2.960041
Karnataka        2.940417
Uttar Pradesh    2.925586
Name: Daily Cups Consumed, dtype: float64

### 4️⃣ Identify the Fastest Growing Coffee Markets (States with Highest Growth Rate)

In [54]:
data['Age Group'].value_counts()

Age Group
50+      2576
36-50    2488
18-25    2469
26-35    2467
Name: count, dtype: int64

In [56]:
data.groupby('State')['Age Group'].value_counts()

State          Age Group
Delhi          50+          282
               36-50        253
               26-35        249
               18-25        241
Gujarat        50+          281
               18-25        271
               36-50        256
               26-35        229
Karnataka      36-50        263
               50+          261
               18-25        244
               26-35        239
Maharashtra    50+          520
               26-35        516
               36-50        492
               18-25        476
Rajasthan      36-50        243
               50+          241
               26-35        238
               18-25        219
Tamil Nadu     26-35        263
               36-50        242
               18-25        240
               50+          231
Telangana      50+          263
               18-25        257
               26-35        257
               36-50        251
Uttar Pradesh  18-25        251
               50+          251
               

In [58]:
data['Gender'].value_counts()

Gender
Other     3373
Male      3361
Female    3266
Name: count, dtype: int64

In [60]:
data.groupby('Gender')['Age Group'].value_counts()

Gender  Age Group
Female  50+          839
        26-35        818
        18-25        813
        36-50        796
Male    50+          871
        36-50        860
        26-35        825
        18-25        805
Other   50+          866
        18-25        851
        36-50        832
        26-35        824
Name: count, dtype: int64

In [62]:
data['Coffee Type'].value_counts()

Coffee Type
Filter Coffee    2091
Espresso         1992
Cold Coffee      1980
Brewed           1977
Instant          1960
Name: count, dtype: int64

In [64]:
data['Preferred Coffee Brand'].value_counts()

Preferred Coffee Brand
Others         2079
Bru            2025
Nestlé         2011
Starbucks      1946
Local Brand    1939
Name: count, dtype: int64

In [66]:
data['Milk Preference'].value_counts()

Milk Preference
Yes    5016
No     4984
Name: count, dtype: int64

In [68]:
data['Sugar Preference'].value_counts()

Sugar Preference
Medium Sugar    2507
High Sugar      2504
Less Sugar      2498
No Sugar        2491
Name: count, dtype: int64

In [70]:
data['Reason for Consumption'].value_counts()

Reason for Consumption
Energy         2655
Socializing    2498
Taste          2481
Habit          2366
Name: count, dtype: int64

In [72]:
data['Influence of Social Media on Choice'].value_counts()

Influence of Social Media on Choice
Yes    5025
No     4975
Name: count, dtype: int64

In [74]:
data['Preferred Time of Consumption'].value_counts()

Preferred Time of Consumption
Morning      2535
Night        2531
Afternoon    2485
Evening      2449
Name: count, dtype: int64

In [86]:
data.groupby('State')['Monthly Coffee Expense (INR)'].sum()

State
Delhi            2641930
Gujarat          2676243
Karnataka        2559971
Maharashtra      5162685
Rajasthan        2467203
Tamil Nadu       2520074
Telangana        2640815
Uttar Pradesh    2527525
West Bengal      2557407
Name: Monthly Coffee Expense (INR), dtype: int64

In [88]:
data['Consumption Setting'].value_counts()

Consumption Setting
Home      3344
Office    3329
Café      3327
Name: count, dtype: int64

In [90]:
data.groupby('Gender')['Consumption Setting'].value_counts()

Gender  Consumption Setting
Female  Café                   1108
        Home                   1090
        Office                 1068
Male    Office                 1153
        Home                   1139
        Café                   1069
Other   Café                   1150
        Home                   1115
        Office                 1108
Name: count, dtype: int64

In [92]:
data.groupby('Age Group')['Consumption Setting'].value_counts()

Age Group  Consumption Setting
18-25      Home                   831
           Café                   827
           Office                 811
26-35      Café                   831
           Home                   828
           Office                 808
36-50      Café                   833
           Office                 829
           Home                   826
50+        Office                 881
           Home                   859
           Café                   836
Name: count, dtype: int64