In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import pandas as pd
import random

# Predefined lists of data
countries = ['Pakistan', 'India', 'Bangladesh', 'Sri Lanka', 'Nepal', 'China', 'Afghanistan', 'Bhutan']
weathers = ['Hot', 'Cold', 'Rainy', 'Humid']
food_types = ['Breakfast', 'Lunch', 'Dinner']

# Expanded Food recommendations with common foods across countries
food_dict = {
    # Pakistan
    ('Pakistan', 'Hot', 'Lunch'): ['Biryani', 'Chicken Karahi', 'Naan'],
    ('Pakistan', 'Cold', 'Dinner'): ['Nihari', 'Paya', 'Roti'],
    ('Pakistan', 'Rainy', 'Breakfast'): ['Paratha', 'Omelette', 'Chai'],
    
    # India
    ('India', 'Hot', 'Lunch'): ['Biryani (Shared with Pakistan)', 'Rajma Chawal', 'Naan (Shared with Pakistan)'],
    ('India', 'Cold', 'Dinner'): ['Tandoori Chicken', 'Dal Makhani', 'Roti (Common in South Asia)'],
    
    # Bangladesh
    ('Bangladesh', 'Hot', 'Lunch'): ['Bhuna Khichuri', 'Fish Curry', 'Biryani (Shared with Pakistan and India)'],
    ('Bangladesh', 'Cold', 'Dinner'): ['Kacchi Biryani', 'Beef Curry', 'Naan (Common in South Asia)'],
    
    # Sri Lanka
    ('Sri Lanka', 'Hot', 'Lunch'): ['Kottu Roti', 'Fish Curry', 'Rice'],
    ('Sri Lanka', 'Cold', 'Dinner'): ['Devilled Chicken', 'Eggplant Curry', 'Pittu'],
    
    # Nepal
    ('Nepal', 'Hot', 'Lunch'): ['Momo (Shared with Bhutan)', 'Dal Bhat', 'Achar'],
    ('Nepal', 'Cold', 'Dinner'): ['Thukpa', 'Sel Roti', 'Gundruk'],
    
    # China
    ('China', 'Hot', 'Lunch'): ['Kung Pao Chicken', 'Fried Rice', 'Dumplings'],
    ('China', 'Cold', 'Dinner'): ['Hot Pot', 'Noodles', 'Baozi'],
    
    # Afghanistan
    ('Afghanistan', 'Hot', 'Lunch'): ['Kabuli Pulao', 'Mantu', 'Naan (Common in South Asia)'],
    
    # Bhutan
    ('Bhutan', 'Hot', 'Lunch'): ['Momo (Shared with Nepal)', 'Ema Datshi', 'Red Rice']
}

# Random fallback list of foods if no specific combination is found
fallback_foods = [
    ['Mixed Vegetable Curry', 'Chapati', 'Salad'],
    ['Fried Rice', 'Samosa', 'Raita'],
    ['Pasta', 'Garlic Bread', 'Soup'],
    ['Grilled Chicken', 'Fries', 'Steamed Veggies']
]


In [3]:
# Function to get food recommendations based on country, weather, and food type
def get_food_recommendation(country, weather, food_type):
    return food_dict.get((country, weather, food_type), random.choice(fallback_foods))

# Generate 500 rows of data
data = []
for _ in range(500):
    country = random.choice(countries)
    weather = random.choice(weathers)
    food_type = random.choice(food_types)
    
    recommended_foods = get_food_recommendation(country, weather, food_type)
    data.append([country, weather, food_type, recommended_foods])

In [4]:
df = pd.DataFrame(data, columns=['Country', 'Weather', 'Food Type', 'Recommended Foods'])

In [5]:
df.head()

Unnamed: 0,Country,Weather,Food Type,Recommended Foods
0,Pakistan,Rainy,Breakfast,"[Paratha, Omelette, Chai]"
1,Bhutan,Cold,Lunch,"[Grilled Chicken, Fries, Steamed Veggies]"
2,India,Cold,Breakfast,"[Grilled Chicken, Fries, Steamed Veggies]"
3,India,Rainy,Breakfast,"[Pasta, Garlic Bread, Soup]"
4,Afghanistan,Rainy,Dinner,"[Pasta, Garlic Bread, Soup]"


In [6]:
df.shape

(500, 4)

In [7]:
df

Unnamed: 0,Country,Weather,Food Type,Recommended Foods
0,Pakistan,Rainy,Breakfast,"[Paratha, Omelette, Chai]"
1,Bhutan,Cold,Lunch,"[Grilled Chicken, Fries, Steamed Veggies]"
2,India,Cold,Breakfast,"[Grilled Chicken, Fries, Steamed Veggies]"
3,India,Rainy,Breakfast,"[Pasta, Garlic Bread, Soup]"
4,Afghanistan,Rainy,Dinner,"[Pasta, Garlic Bread, Soup]"
...,...,...,...,...
495,Pakistan,Rainy,Dinner,"[Fried Rice, Samosa, Raita]"
496,Bangladesh,Cold,Breakfast,"[Pasta, Garlic Bread, Soup]"
497,Sri Lanka,Humid,Lunch,"[Pasta, Garlic Bread, Soup]"
498,Nepal,Humid,Dinner,"[Fried Rice, Samosa, Raita]"


In [8]:
df.describe()

Unnamed: 0,Country,Weather,Food Type,Recommended Foods
count,500,500,500,500
unique,8,4,3,19
top,Afghanistan,Cold,Breakfast,"[Fried Rice, Samosa, Raita]"
freq,75,128,182,113


In [9]:
df.corr

<bound method DataFrame.corr of          Country Weather  Food Type                          Recommended Foods
0       Pakistan   Rainy  Breakfast                  [Paratha, Omelette, Chai]
1         Bhutan    Cold      Lunch  [Grilled Chicken, Fries, Steamed Veggies]
2          India    Cold  Breakfast  [Grilled Chicken, Fries, Steamed Veggies]
3          India   Rainy  Breakfast                [Pasta, Garlic Bread, Soup]
4    Afghanistan   Rainy     Dinner                [Pasta, Garlic Bread, Soup]
..           ...     ...        ...                                        ...
495     Pakistan   Rainy     Dinner                [Fried Rice, Samosa, Raita]
496   Bangladesh    Cold  Breakfast                [Pasta, Garlic Bread, Soup]
497    Sri Lanka   Humid      Lunch                [Pasta, Garlic Bread, Soup]
498        Nepal   Humid     Dinner                [Fried Rice, Samosa, Raita]
499        Nepal    Cold      Lunch    [Mixed Vegetable Curry, Chapati, Salad]

[500 rows x 4 colum

In [10]:
df['Weather'].value_counts()

Weather
Cold     128
Humid    127
Hot      123
Rainy    122
Name: count, dtype: int64

# Preprocessing 

In [11]:
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd
import ast
df['Recommended Foods'] = df['Recommended Foods'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
y = df['Recommended Foods'].apply(lambda x: x if isinstance(x, list) else [])

mlb = MultiLabelBinarizer()
y_encoded = mlb.fit_transform(y)

food_labels = mlb.classes_
X = df[['Country', 'Weather', 'Food Type']]

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline


X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=1)

In [13]:
X_train.shape

(400, 3)

In [14]:
X_test.shape

(100, 3)

In [15]:
y_train

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [16]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['Country', 'Weather', 'Food Type'])
    ]
)
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.31

In [18]:
def recommend_food(country, weather, food_type):
    input_data = pd.DataFrame({'Country': [country], 'Weather': [weather], 'Food Type': [food_type]})
    recommended_foods_encoded = pipeline.predict(input_data)
    
    recommended_foods = mlb.inverse_transform(recommended_foods_encoded)
    
    # Return the first 3 food items if available
    return recommended_foods[0][:3] if recommended_foods else ['No food recommended']

# Example usage
print(recommend_food('China', 'Hot', 'Lunch'))

('Dumplings', 'Fried Rice', 'Kung Pao Chicken')


In [19]:
df['Country'].value_counts()

Country
Afghanistan    75
Bangladesh     73
Nepal          65
Sri Lanka      63
China          62
Bhutan         57
Pakistan       56
India          49
Name: count, dtype: int64

In [20]:
df['Weather'].value_counts()

Weather
Cold     128
Humid    127
Hot      123
Rainy    122
Name: count, dtype: int64

In [21]:
df['Food Type'].value_counts()

Food Type
Breakfast    182
Lunch        175
Dinner       143
Name: count, dtype: int64