In [1]:
import pandas as pd

# Load the dataset
file_path = 'filtered_rs_currency_dataset.csv'
data = pd.read_csv(file_path)

# Display basic info to understand the structure of the dataset
data.info(), data.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8672 entries, 0 to 8671
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   name                  8672 non-null   object 
 1   cuisines              8672 non-null   object 
 2   average_cost_for_two  8672 non-null   int64  
 3   has_online_delivery   8672 non-null   int64  
 4   url                   8672 non-null   object 
 5   price_range           8672 non-null   int64  
 6   currency              8672 non-null   object 
 7   has_table_booking     8672 non-null   int64  
 8   photos_url            8672 non-null   object 
 9   thumb                 8672 non-null   object 
 10  menu_url              8672 non-null   object 
 11  rating_text           8672 non-null   object 
 12  rating_color          8672 non-null   object 
 13  votes                 8672 non-null   int64  
 14  aggregate_rating      8672 non-null   float64
 15  latitude             

(None,
                          name  \
 0            Hauz Khas Social   
 1  Qubitos - The Terrace Cafe   
 2             The Hudson Cafe   
 3           Summer House Cafe   
 4                 38 Barracks   
 
                                             cuisines  average_cost_for_two  \
 0         Continental, American, Asian, North Indian                  1600   
 1  Thai, European, Mexican, North Indian, Chinese...                  1500   
 2                Cafe, Italian, Continental, Chinese                   850   
 3                               Italian, Continental                  1850   
 4             North Indian, Italian, Asian, American                  1600   
 
    has_online_delivery                                                url  \
 0                    1  https://www.zomato.com/HauzKhasSocial?utm_sour...   
 1                    0  https://www.zomato.com/ncr/qubitos-the-terrace...   
 2                    1  https://www.zomato.com/ncr/the-hudson-cafe-del...   

In [2]:
unique_values = data['currency'].unique()
print(unique_values)


['Rs.']


In [3]:
# Filling missing values for 'cuisines' by mode (most frequent cuisine)
mode_cuisines = data['cuisines'].mode()[0]
data['cuisines'].fillna(mode_cuisines, inplace=True)

# Mapping currencies based on city - assuming each city has one primary currency
currency_by_city = data[['city', 'currency']].dropna().drop_duplicates()
city_currency_map = dict(zip(currency_by_city['city'], currency_by_city['currency']))

# Fill missing 'currency' values based on city
data['currency'] = data.apply(lambda x: city_currency_map.get(x['city'], x['currency']), axis=1)

# Fill any remaining missing 'currency' with the most common currency in that city if possible
data['currency'].fillna(method='ffill', inplace=True)

# Converting the cost according to the respective currency
def format_cost(row):
    return f"{row['currency']} {row['average_cost_for_two']}"

data['formatted_cost'] = data.apply(format_cost, axis=1)

# Handling missing 'url' values by filling with similar restaurant URLs based on cuisine
data['url'].fillna(data.groupby('cuisines')['url'].transform('first'), inplace=True)

# Encoding categorical data (if needed later)
data_encoded = pd.get_dummies(data, columns=['city', 'locality'], drop_first=True)


data.head()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['cuisines'].fillna(mode_cuisines, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['currency'].fillna(method='ffill', inplace=True)
  data['currency'].fillna(method='ffill', inplace=True)
The behavior will change in pandas 3.0. This inplace method will neve

Unnamed: 0,name,cuisines,average_cost_for_two,has_online_delivery,url,price_range,currency,has_table_booking,photos_url,thumb,...,rating_color,votes,aggregate_rating,latitude,longitude,address,city,locality,locality_verbose,formatted_cost
0,Hauz Khas Social,"Continental, American, Asian, North Indian",1600,1,https://www.zomato.com/HauzKhasSocial?utm_sour...,3,Rs.,1,https://www.zomato.com/HauzKhasSocial/photos?u...,https://b.zmtcdn.com/data/pictures/2/308322/cf...,...,5BA829,7931,4.3,28.554285,77.194471,"9-A & 12, Hauz Khas Village, New Delhi",New Delhi,Hauz Khas Village,"Hauz Khas Village, New Delhi",Rs. 1600
1,Qubitos - The Terrace Cafe,"Thai, European, Mexican, North Indian, Chinese...",1500,0,https://www.zomato.com/ncr/qubitos-the-terrace...,3,Rs.,1,https://www.zomato.com/ncr/qubitos-the-terrace...,https://b.zmtcdn.com/data/pictures/7/18037817/...,...,3F7E00,778,4.5,28.647133,77.117701,"C-7, Vishal Enclave, Opposite Metro Pillar 417...",New Delhi,Rajouri Garden,"Rajouri Garden, New Delhi",Rs. 1500
2,The Hudson Cafe,"Cafe, Italian, Continental, Chinese",850,1,https://www.zomato.com/ncr/the-hudson-cafe-del...,2,Rs.,0,https://www.zomato.com/ncr/the-hudson-cafe-del...,https://b.zmtcdn.com/data/pictures/5/312345/03...,...,5BA829,1537,4.4,28.694947,77.204317,"2524, 1st Floor, Hudson Lane, Delhi University...",New Delhi,Delhi University-GTB Nagar,"Delhi University-GTB Nagar, New Delhi",Rs. 850
3,Summer House Cafe,"Italian, Continental",1850,0,https://www.zomato.com/ncr/summer-house-cafe-h...,3,Rs.,1,https://www.zomato.com/ncr/summer-house-cafe-h...,https://b.zmtcdn.com/data/pictures/0/307490/e0...,...,5BA829,1823,4.1,28.55252,77.203809,"1st Floor, DDA Shopping Complex, Aurobindo Pla...",New Delhi,Hauz Khas,"Hauz Khas, New Delhi",Rs. 1850
4,38 Barracks,"North Indian, Italian, Asian, American",1600,0,https://www.zomato.com/ncr/38-barracks-connaug...,3,Rs.,1,https://www.zomato.com/ncr/38-barracks-connaug...,https://b.zmtcdn.com/data/pictures/7/18241537/...,...,5BA829,840,4.4,28.633025,77.222858,"M-38, Outer Circle, Connaught Place, New Delhi",New Delhi,Connaught Place,"Connaught Place, New Delhi",Rs. 1600


In [4]:
data.to_csv('data_recom.csv', index=False)


In [11]:
print(data.columns)

Index(['name', 'cuisines', 'average_cost_for_two', 'has_online_delivery',
       'url', 'price_range', 'currency', 'has_table_booking', 'photos_url',
       'thumb', 'menu_url', 'rating_text', 'rating_color', 'votes',
       'aggregate_rating', 'latitude', 'longitude', 'address', 'city',
       'locality', 'locality_verbose', 'formatted_cost'],
      dtype='object')
