In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder



# Data Loading

In [2]:
# Load datasets
train_data = pd.read_excel("https://github.com/FlipRoboTechnologies/ML-Datasets/raw/main/Restaurant%20Food%20Cost/Data_Train.xlsx")
test_data = pd.read_excel("https://github.com/FlipRoboTechnologies/ML-Datasets/raw/main/Restaurant%20Food%20Cost/Data_Test.xlsx")


Unnamed: 0,TITLE,RESTAURANT_ID,CUISINES,TIME,CITY,LOCALITY,RATING,VOTES,COST
0,CASUAL DINING,9438,"Malwani, Goan, North Indian","11am – 4pm, 7:30pm – 11:30pm (Mon-Sun)",Thane,Dombivali East,3.6,49 votes,1200
1,"CASUAL DINING,BAR",13198,"Asian, Modern Indian, Japanese",6pm – 11pm (Mon-Sun),Chennai,Ramapuram,4.2,30 votes,1500
2,CASUAL DINING,10915,"North Indian, Chinese, Biryani, Hyderabadi","11am – 3:30pm, 7pm – 11pm (Mon-Sun)",Chennai,Saligramam,3.8,221 votes,800
3,QUICK BITES,6346,"Tibetan, Chinese",11:30am – 1am (Mon-Sun),Mumbai,Bandra West,4.1,24 votes,800
4,DESSERT PARLOR,15387,Desserts,11am – 1am (Mon-Sun),Mumbai,Lower Parel,3.8,165 votes,300
5,CASUAL DINING,5618,"North Indian, Chinese, Seafood, Biryani",12noon – 12:30AM (Mon-Sun),Mumbai,Vile Parle West,4.0,550 votes,800
6,CAFÉ,4383,"Cafe, Chinese, Pizza, North Indian, Burger","12noon – 11:30pm (Mon, Tue, Thu, Fri, Sat, Sun...",Bangalore,Koramangala 6th Block,4.1,509 votes,600
7,MICROBREWERY,1249,Bar Food,1pm – 1am (Mon-Sun),Gurgaon,Sector 29,4.3,1612 votes,1600
8,QUICK BITES,7062,"South Indian, North Indian",7am – 10:30pm (Mon-Sun),Bangalore,Bannerghatta Road,3.9,101 votes,300
9,QUICK BITES,10109,North Indian,"11am – 2pm, 4pm – 10:30pm (Mon-Sun)",Hyderabad,Dilsukhnagar,3.3,219 votes,200


# Data Cleaning

In [None]:
# Replace dashes (-) with NaN
train_data.replace('-', np.nan, inplace=True)
test_data.replace('-', np.nan, inplace=True)

# Train and Test data

In [None]:
# Combine train and test data to ensure consistent encoding
combined_data = pd.concat([train_data.drop(columns=["COST"]), test_data], axis=0)

# Impute missing values
imputer = SimpleImputer(strategy='most_frequent')  # Impute with the most frequent value
combined_data_imputed = imputer.fit_transform(combined_data)

# Label encode categorical variables
label_encoders = {}
for col in combined_data.select_dtypes(include=['object']).columns:
    label_encoders[col] = LabelEncoder()
    combined_data[col] = label_encoders[col].fit_transform(combined_data[col])

# Split combined data back into train and test
train_data_encoded = combined_data[:len(train_data)]
test_data_encoded = combined_data[len(train_data):]


# Random Forrest Regressor Model Training

In [21]:
# Define and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data_encoded, train_data["COST"])

# Make predictions on the test data
test_predictions = model.predict(test_data_encoded)

# Save predictions to a file
predictions_df = pd.DataFrame({"RESTAURANT_ID": test_data["RESTAURANT_ID"], "COST": test_predictions})
predictions_df.to_csv("predicted_food_cost.csv", index=False)


# Cost Prediction

In [23]:
# Make predictions on the test data
test_predictions = model.predict(test_data_encoded)

# Print the predictions
print("Predictions for test data:")
for idx, prediction in enumerate(test_predictions):
    print(f"Restaurant ID: {test_data['RESTAURANT_ID'].iloc[idx]}, Predicted Cost: {prediction}")

Predictions for test data:
Restaurant ID: 4085, Predicted Cost: 1279.5
Restaurant ID: 12680, Predicted Cost: 278.7
Restaurant ID: 1411, Predicted Cost: 696.0
Restaurant ID: 204, Predicted Cost: 378.0
Restaurant ID: 13453, Predicted Cost: 408.2
Restaurant ID: 4518, Predicted Cost: 251.8
Restaurant ID: 1643, Predicted Cost: 817.5
Restaurant ID: 5109, Predicted Cost: 757.5
Restaurant ID: 5606, Predicted Cost: 365.0
Restaurant ID: 14319, Predicted Cost: 923.6
Restaurant ID: 4079, Predicted Cost: 349.0
Restaurant ID: 8873, Predicted Cost: 419.0
Restaurant ID: 12322, Predicted Cost: 1773.5
Restaurant ID: 7910, Predicted Cost: 2710.5
Restaurant ID: 309, Predicted Cost: 876.0
Restaurant ID: 1224, Predicted Cost: 714.5
Restaurant ID: 6376, Predicted Cost: 434.0
Restaurant ID: 3931, Predicted Cost: 332.5
Restaurant ID: 2274, Predicted Cost: 187.4
Restaurant ID: 4974, Predicted Cost: 297.0
Restaurant ID: 12078, Predicted Cost: 640.5
Restaurant ID: 3852, Predicted Cost: 722.5
Restaurant ID: 380, P