In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
file_path = 'Dataset.csv'  # Adjust this path to where your dataset is stored
data = pd.read_csv(file_path)

# Handling missing values for 'Cuisines', crucial for cuisine classification
data['Cuisines'].fillna('Various', inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
data['Cuisines'] = label_encoder.fit_transform(data['Cuisines'])

# Preparing features and target variable
X = data[['City', 'Rating text', 'Price range']]  # Consider adding or removing features as necessary
y = data['Cuisines']  # Target variable

# Encoding other categorical features
X = pd.get_dummies(X, columns=['City', 'Rating text'])

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
logistic_model = LogisticRegression(max_iter=500)
random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)

logistic_model.fit(X_train, y_train)
random_forest_model.fit(X_train, y_train)

# Predictions
y_pred_logistic = logistic_model.predict(X_test)
y_pred_forest = random_forest_model.predict(X_test)

# Model evaluation
accuracy_log = accuracy_score(y_test, y_pred_logistic)
precision_log = precision_score(y_test, y_pred_logistic, average='macro')
recall_log = recall_score(y_test, y_pred_logistic, average='macro')
f1_log = f1_score(y_test, y_pred_logistic, average='macro')

accuracy_forest = accuracy_score(y_test, y_pred_forest)
precision_forest = precision_score(y_test, y_pred_forest, average='macro')
recall_forest = recall_score(y_test, y_pred_forest, average='macro')
f1_forest = f1_score(y_test, y_pred_forest, average='macro')

print("Logistic Regression Metrics:")
print(f"Accuracy: {accuracy_log}, Precision: {precision_log}, Recall: {recall_log}, F1 Score: {f1_log}")

print("Random Forest Metrics:")
print(f"Accuracy: {accuracy_forest}, Precision: {precision_forest}, Recall: {recall_forest}, F1 Score: {f1_forest}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Cuisines'].fillna('Various', inplace=True)


Logistic Regression Metrics:
Accuracy: 0.12244897959183673, Precision: 0.0011384227061958171, Recall: 0.0036707855034735642, F1 Score: 0.0017013540687987438
Random Forest Metrics:
Accuracy: 0.11564625850340136, Precision: 0.0038132726640384036, Recall: 0.005063026305360399, F1 Score: 0.0037928707891029907


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
