# Analysing naming patterns of 'darshinis' in Bangalore

In [53]:
import pandas as pd

In [54]:
df = pd.read_csv("zomato.csv")

In [55]:
#Rename column headers to remove potentially problematic brackets and spaces
df = df.rename(columns = {'approx_cost(for two people)':'cost_for_two'})
df = df.rename(columns = {'listed_in(type)':'type'})
df = df.rename(columns = {'listed_in(city)': 'city'})

In [56]:
udupi = df[df['name'].str.contains('Udupi')]
udupi.shape

(340, 17)

In [57]:
#Repeat with all popular search terms
upahar = df[df['name'].str.contains('Upahar')]
darshini = df[df['name'].str.contains('Darshini')]
bhavan = df[df['name'].str.contains('Bhavan')]
vishnu = df[df['name'].str.contains('Vishnu')]
krishna = df[df['name'].str.contains('Krishna')]
brahmins = df[df['name'].str.contains('Brahmin')]
veg = df[df['name'].str.contains('Veg')]

In [58]:
print("Names with 'Udupi' in it =",len(udupi))
print("Names with 'Upahar' in it =",len(upahar))
print("Names with 'Darshini' in it =",len(darshini))
print("Names with 'Bhavan' in it =",len(bhavan))
print("Names with 'Vishnu' in it =",len(vishnu))
print("Names with 'Krishna' in it =",len(krishna))
print("Names with 'Brahmin' in it =",len(brahmins))
print("Names with 'Veg' in it =",len(veg))

Names with 'Udupi' in it = 340
Names with 'Upahar' in it = 180
Names with 'Darshini' in it = 71
Names with 'Bhavan' in it = 288
Names with 'Vishnu' in it = 57
Names with 'Krishna' in it = 254
Names with 'Brahmin' in it = 27
Names with 'Veg' in it = 545


In [59]:
#Merge all to form sample of darshini names
sample = pd.concat([udupi, upahar, darshini, bhavan, vishnu, krishna, brahmins, veg], keys = ['udupi','upahar', 'darshini', 'bhavan', 'vishnu', 'krishna', 'brahmins', 'veg'])
len(sample)

1762

### Cleaning the sample
1. Remove duplicates
2. Filter out restaurants with > 500 as cost for two - since they don't qualify as darshinis. This requires dropping rows with NaN values for cost
3. Filter out restaurants based on restaurant type (rest_type) and cuisine
4. Drop absolutely unnecessary columns

In [60]:
sample = sample.drop_duplicates()
len(sample)

1585

In [61]:
sample = sample.dropna(subset = ['cost_for_two'])

In [62]:
#filter out restaurants based on cost_for_two
sample['cost_for_two'].describe()

count     1583
unique      17
top        300
freq       375
Name: cost_for_two, dtype: object

In [63]:
sample = sample[(sample['cost_for_two'].astype(int)<500)]

In [64]:
#remove Sweet Shop, Dhaba, Bakery etc.
sample['rest_type'].value_counts()

rest_type
Quick Bites                1082
Casual Dining               153
Takeaway, Delivery           63
Sweet Shop                   40
Sweet Shop, Quick Bites      17
Mess                          6
Delivery                      5
Quick Bites, Sweet Shop       4
Dhaba                         2
Bakery, Quick Bites           2
Takeaway                      1
Name: count, dtype: int64

In [65]:
sample = sample[sample["rest_type"].str.contains("Sweet Shop|Dhaba|Bakery|Mess") == False]
sample['rest_type'].value_counts()

rest_type
Quick Bites           1082
Casual Dining          153
Takeaway, Delivery      63
Delivery                 5
Takeaway                 1
Name: count, dtype: int64

In [66]:
sample['cuisines'].value_counts()

cuisines
South Indian                                        290
South Indian, North Indian, Chinese                 248
South Indian, Chinese                                78
South Indian, North Indian, Chinese, Street Food     66
South Indian, North Indian                           57
                                                   ... 
Biryani                                               1
North Indian, Street Food, Pizza                      1
Italian, Pizza                                        1
South Indian, Fast Food, Juices                       1
North Indian, South Indian, Chinese, Street Food      1
Name: count, Length: 81, dtype: int64

In [67]:
sample = sample[sample['cuisines'].str.contains("South Indian")]

In [68]:
sample = sample.drop('url', axis = 1)

In [69]:

sample = sample.drop('online_order', axis = 1)
sample = sample.drop('book_table', axis = 1)
sample = sample.drop('rate', axis = 1)
sample = sample.drop('votes', axis = 1)
sample = sample.drop('phone', axis = 1)

sample = sample.drop('type', axis = 1)
sample = sample.drop('city', axis = 1)

#### Final sample

In [70]:
sample.shape

(1078, 9)

In [71]:
sample.head()

Unnamed: 0,Unnamed: 1,address,name,location,rest_type,dish_liked,cuisines,cost_for_two,reviews_list,menu_item
udupi,3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[]
udupi,149,"No.1585, 22nd Main,Banashankari 1st Stage",Udupi Upahar,Banashankari,Quick Bites,,"South Indian, Chinese, North Indian, Juices",300,"[('Rated 1.0', 'RATED\n Food is not good... N...","['Rava Dosa', 'Pepper Masala Dosa', 'Butter Ma..."
udupi,196,"31/1, Puttalingaiah Road, Second Stage, Bangal...",Udupi Ruchi,Banashankari,Quick Bites,,"South Indian, North Indian, Chinese, Street Food",200,"[('Rated 1.0', ""RATED\n Worst option to order...","['Idly Vada', 'Onion Dosa', 'Rava Dosa', 'Rava..."
udupi,240,"335, KR Road, Shastri Nagar, Yediyur, Jayanaga...",Sri Udupi Veg,Jayanagar,Quick Bites,,South Indian,250,[],[]
udupi,244,"164/B, 24th Cross, 6th Block Yediyur, Jayanaga...",New Sri Udupi Restaurant,Jayanagar,Quick Bites,,"North Indian, South Indian, Chinese, Beverages...",200,[],[]


In [72]:
sorted(sample.name.unique())

["'Brahmins' Thatte Idli",
 '99 Udupi Veg',
 'A2B - Adyar Ananda Bhavan',
 'A2B Veg - Adyar Ananda Bhavan',
 'Addhuri Udupi Bhojana',
 'Adithya Veg',
 'Adyar Anand Bhavan Sweets',
 'Akshaya Sambrama Veg Restaurant',
 'Amaravati - Pure Veg Restaurant',
 'Annapurna Veg',
 'Ashirvad Veg',
 'Atithi Aramane Veg',
 'Atithi Grand Veg Hotel',
 'BJ Bhavan Veg Restaurants',
 'Bangalore Agarwal Bhavan',
 'Basava Bhavana (B2)',
 'Bhavani Foods',
 'Bhavani Restaurant',
 'Bhavani Upahara',
 'Brahmin Cafe',
 'Brahmin Tiffins & Coffee',
 "Brahmin's Coffee Bar",
 'Brahmins Food Corner',
 "Brahmins' Thatte Idli",
 'Brindavan Veg',
 'Cafe Udupi Ruchi',
 'Chaitanya Veg',
 'Chaithanya Veg',
 'Chamarajpet Non Veg Restaurant',
 'Chandra Prakesh Veg Restaurant',
 'CraveBelly Veg & Non veg',
 "Dambal's Veg",
 'Ganesh Bhavan',
 'Ganesh Darshini',
 'Gangothri Veg Restaurant',
 'HSR Agarval Bhavan',
 'HVR Veg',
 'Hare Krishna',
 'Hare Krishna Kuteera',
 'Hotel Ayodhya Upahar',
 'Hotel Kadamba Veg',
 'Hotel Mahesh

In [73]:
sample.cost_for_two.median()

300.0

In [74]:
len(sample)

1078

In [75]:
sample.location.value_counts()

location
BTM                           117
Jayanagar                     100
Shivajinagar                   66
HSR                            66
Basavanagudi                   37
                             ... 
ITPL Main Road, Whitefield      2
Sanjay Nagar                    1
Sadashiv Nagar                  1
Seshadripuram                   1
Hebbal                          1
Name: count, Length: 62, dtype: int64

In [78]:
sample['name'].str.contains("Sri|Shree|Shri").value_counts()

name
False    815
True     263
Name: count, dtype: int64

In [None]:
sample.to_excel("darshini_sample.xlsx")

In [None]:
pd.set_option('display.max_rows', None)
sample.name.value_counts()

name
A2B - Adyar Ananda Bhavan                   49
Sri Udupi Park                              34
Sri Krishna Sagar                           30
New Udupi Grand                             25
Vijayalakshmi Veg                           23
New Udupi Upahar                            21
Sree Krishna Kafe                           17
Upahara Darshini                            14
Indira Darshini                             14
Udupi Grand                                 13
Vegetarea                                   13
Krishna Vaibhava                            12
Shilpa Vegetarian Restaurant                12
Shilpa Veg                                  12
Chaitanya Veg                               12
Udupi Ruchi                                 12
Sri Ayodhya Veg                             11
Sri Udupi Grand                             11
Swaadam Veg                                 11
Sri Saravana Bhavan                         11
Krishna Sagar                               11
Upahara 

In [None]:
names = sample[['name']]
names = names.drop_duplicates()

NameError: name 'sample' is not defined

In [None]:
names_split = names['name'].str.split(' ', expand = True)
names_split

Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6
udupi,3,Addhuri,Udupi,Bhojana,,,,
udupi,149,Udupi,Upahar,,,,,
udupi,196,Udupi,Ruchi,,,,,
udupi,240,Sri,Udupi,Veg,,,,
udupi,244,New,Sri,Udupi,Restaurant,,,
udupi,297,Udupi,Ruchi,Grand,,,,
udupi,342,Udupi,Grand,,,,,
udupi,737,Shree,Udupi,South,kitchen,,,
udupi,1044,Shree,Udupi,Grand,,,,
udupi,1173,New,Udupi,Grand,,,,


In [None]:
names_split = names_split.rename(columns = {'O':'prefix'})
names_split

Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6
udupi,3,Addhuri,Udupi,Bhojana,,,,
udupi,149,Udupi,Upahar,,,,,
udupi,196,Udupi,Ruchi,,,,,
udupi,240,Sri,Udupi,Veg,,,,
udupi,244,New,Sri,Udupi,Restaurant,,,
udupi,297,Udupi,Ruchi,Grand,,,,
udupi,342,Udupi,Grand,,,,,
udupi,737,Shree,Udupi,South,kitchen,,,
udupi,1044,Shree,Udupi,Grand,,,,
udupi,1173,New,Udupi,Grand,,,,


In [None]:
names_split.to_excel("names.xlsx")

In [None]:
df = pd.read_csv("names_clean.csv")
df

Unnamed: 0,prefix,suffix_1,suffix_2,suffix_3,suffix_4,suffix_5,suffix_6
0,Parisar,Veg,Restaurant,,,,
1,Vakulaa,Veg,Restaurant,,,,
2,Vishwamitra,Veg,Restaurant,,,,
3,Skandas,Veg,Restaurant,,,,
4,Gangothri,Veg,Restaurant,,,,
5,Loukya,Veg,Restaurant,,,,
6,Sitara,Veg,Restaurant,,,,
7,Udupas,Veg,Restaurant,,,,
8,Mayur,Veg,Restaurant,,,,
9,Pavana,Veg,Restaurant,,,,


In [None]:
df.prefix.value_counts().head()

prefix
Sri        49
Udupi      29
New        18
Hotel      10
Upahara     6
Name: count, dtype: int64

In [None]:
df.suffix_1.value_counts().head()

suffix_1
Veg        42
Udupi      28
Upahara    20
Krishna    17
Bhavan     10
Name: count, dtype: int64

In [None]:
df.suffix_2.value_counts().head()

suffix_2
Veg           26
Bhavan        14
Restaurant    11
Grand         10
Upahara       10
Name: count, dtype: int64

In [None]:
df.suffix_3.value_counts().head()

suffix_3
Veg           8
Restaurant    6
Bhavan        5
Hotel         3
&             2
Name: count, dtype: int64

In [None]:
df.suffix_4.value_counts().head()

suffix_4
&             3
Non-Veg       3
Restaurant    2
Veg           2
Bhavan        1
Name: count, dtype: int64