In [1]:
# Built-in library
import itertools
import re
from typing import Any, Optional, Sequence, Union

# Standard imports
import numpy as np
import pandas as pd
import random
import string
from datetime import datetime, timedelta
from faker import Faker
from bs4 import BeautifulSoup
import requests
import warnings
warnings.filterwarnings("ignore")

# pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 2_000

## Meal Data

In [2]:
food = ['Afang soup', 'Pounded yam and egusi soup', 'Egusi soup and eba', 'Ekwele', 'Plantain', 
        'Vegetable soup', 'Banga soup and semovita', 'Amala', 'Kokoro', 'Jollof spaghetti',
        'Vegetable soup and eba', 'Groundnut soup', 'Iyan', 'Atama dindin', 'Pounded yam', 'Jollof rice',
        'Fufu', 'Edikan ikong', 'Asaro and gbegiri', 'Egusi fufu', 'Efo shoko', 'Kilishi', 'Egwusi soup', 'Asaro', 
        'Okra soup', 'Rice and stew', 'Eba', 'AbachariAbula', 'Groundnut soup and eba', 'Fried rice',
        'Egwusi soup and fufu', 'White rice and stew', 'Pounded yam and vegetable soup', 'Efo elegusi', 'Akara',
        'Yam pottage', 'Ekuru', 'Eja nla', 'Akara and moin moin', 'White rice', 'Egusi soup', 'Epa', 'Semovita',
        'Bitter leaf soup and fufu', 'Ofe onugbu', 'Okpa', 'Pepper soup', 'Garri', 'Ofe nsala', 'Gbegiri and eba',
        'Egusi amala', 'Rice and beans', 'Ofe akwu and semovita', 'Efo riro and amala', 'Abacha', 'Suya', 'Puff puff',
        'Iyan and egusi soup', 'Efo riro', 'Ofe nsala and fufu', 'Plantain pottage', 'Banga soup', 'Semovita and egusi soup', 
        'Plantain porridge', 'Okra soup and fufu', 'Ofe akwu']

In [3]:
drinks = ['Chapman', 'Pepsi', 'Red wine', 'Milk', 'fura and nono', 'Sprite', 'Pineapple juice', 
          'Coconut water', 'Orange juice', 'Tamarind juice', 'Kunun aya', 'Malt', 'Agbo', 'Tea', 'Lacasera',
          'Garri', 'Kunun dawa', 'Akamu', 'fura', 'White wine', 'Zobo', 'Beer', 'Kunun gyada', 'Kunun koko', 'Water',
          'Fruit juice']

In [4]:
# Define function to generate mock data
random.seed(123)

def meal_data(*, food: list, drinks: list, len_restuarant_id: int) -> pd.DataFrame:
    """this funtion generate a mock data using some information and random library
    and returns a dataframe"""

    # create an empty dictionary
    dict_ = {"meal": [], "restaurant_id": [], "price": []}

    # get the number of restaurants
    total_restaurants = len_restuarant_id

    # make a list 
    list_restaurant = range(0, total_restaurants)

    # generating food data for each restaurant
    for idx in list_restaurant:
        no_item = random.randint(10, 20)  #
        food_offered = random.sample(food, no_item)  # randomly selecting list of food
        # assign each food the the restaurant
        for item in food_offered:
            dict_["meal"].append(item)  # updating the meal
            dict_["restaurant_id"].append(idx)  # updating the restaurant id
            price = round(random.uniform(500, 10000), 2)
            dict_["price"].append(price)  # updating the price

    # generating drink data for each restaurant
    for idx in list_restaurant:
        no_item = random.randint(5, 10)
        drinks_offered = random.sample(
            drinks, no_item
        )  # randomly selecting list of food
        for item in drinks_offered:
            dict_["meal"].append(item)  # updating the meal
            dict_["restaurant_id"].append(idx)  # updating the restaurant id
            price = round(random.uniform(500, 1000), 2)
            dict_["price"].append(price)  # updating the price

    # creating a dataframe and assigning the dictionary
    df = pd.DataFrame(dict_)
    return df

In [5]:
# applying the function
meal_df = meal_data(food=food, drinks=drinks, len_restuarant_id=69)
meal_df

Unnamed: 0,meal,restaurant_id,price
0,Akara,0,5593.92
1,Groundnut soup,0,3655.88
2,Ofe akwu and semovita,0,8594.82
3,Edikan ikong,0,2016.79
4,Banga soup and semovita,0,3703.56
...,...,...,...
1532,Lacasera,68,684.49
1533,Kunun dawa,68,834.76
1534,Agbo,68,781.27
1535,Milk,68,840.99


In [6]:
# create a csv file
meal_df.to_csv('meal.csv', index_label='product_id')

In [7]:
meal_df.shape

(1537, 3)

## Customer data

In [8]:
fake = Faker()

streets = ["Allen Avenue","Adeniyi Jones Avenue","Awolowo Road","Broad Street","Herbert Macaulay Way","Victoria Island",
          "Marina Street","Bourdillon Road","Ajose Adeogun Street","Oba Akran Avenue","Isaac John Street","Toyin Street",
          "Adeola Odeku Street","Adeyemo Alakija Street","Opebi Road","Adekunle Fajuyi Way","Adegbola Street",
          "Mobolaji Bank Anthony Way","Ligali Ayorinde Street","Ahmadu Bello Way","Idowu Taylor Street",
          "Akin Adesola Street","Awolowo Way","Ikorodu Road","Oshodi-Apapa Expressway","Creek Road","Joel Ogunnaike Street",
          "Ijaye Road","Ajao Road","Adetokunbo Ademola Street","Bode Thomas Street","Obafemi Awolowo Way",
          "Adeniran Ogunsanya Street","Alhaji Masha Road","Oduduwa Crescent","Adelabu Street","Maryland Mall Road",
          "Yaba Road","Akerele Street","Murtala Muhammed Way","Oregun Road","College Road","Lekki-Epe Expressway",
          "Alausa Secretariat Road","Olusegun Obasanjo Way","Karimu Kotun Street","Ago Palace Way","Akinwunmi Street",
          "Eko Bridge","Western Avenue","Glover Road","Gbagada Expressway","Iganmu Road","Babs Animashaun Road","Oniru Road",
          "Surulere Way","Okota Road","Falomo Bridge","New Market Road","Agege Motor Road","Oniru Beach Road","Aromire Avenue",
          "Ogunlana Drive","Epe Road","Oshodi Market Road","Oyin Jolayemi Street","Ijora Causeway","Apongbon Road",
          "Ojuelegba Road","Oba Akinjobi Street","Ojota Interchange","Simbiat Abiola Road","Eleganza Plaza Road",
          "Obafemi Awolowo Road","Eric Moore Road","Idumagbo Avenue","Bishop Oluwole Street","G.R.A. Ikeja",
          "Ikeja Along Road","Apapa Road","Freedom Park Road"]

state = "Lagos"
num_customers = 956

In [9]:
def customers_data(
    *, streets: list, state: str, num_customers: int
) -> pd.DataFrame:
    dict_ = {
        "customer_id": [],
        "customer_name": [],
        "customer_address": [],
        "customer_phone_num": [],
        "customer_email_address": [],
    }
    for i in range(num_customers):
        # update customer id
        dict_["customer_id"].append(i)

        # get name
        name = fake.name()
        dict_["customer_name"].append(name)

        # get email
        email_name = name.replace(" ", "")
        domain_names_list = ["gmail", "yahoo"]
        domain_name = random.choice(domain_names_list)
        email = email_name + "@" + domain_name + ".com"
        dict_["customer_email_address"].append(email)

        # get address
        address_letter = (" ".join(random.choices(string.ascii_uppercase, k=2))).replace(" ", "")
        get_street = random.choice(streets)
        address_no = str(random.randint(1, 100))
        state = state
        address = address_letter + address_no + " " + get_street + " " + state
        dict_["customer_address"].append(address)

        # get phone number
        area_code = random.randint(100, 999)
        first_part = random.randint(100, 999)
        second_part = random.randint(1000, 9999)
        phone_no = f"{area_code}-{first_part}-{second_part}"
        dict_["customer_phone_num"].append(phone_no)

    # create a dataframe
    df = pd.DataFrame(dict_)
    return df

In [10]:
user_df = customers_data(streets=streets, state=state, num_customers=num_customers)
user_df.head()

Unnamed: 0,customer_id,customer_name,customer_address,customer_phone_num,customer_email_address
0,0,Nicholas Browning,TQ57 Ikorodu Road Lagos,318-286-3743,NicholasBrowning@gmail.com
1,1,Kristen Castillo,JO1 Creek Road Lagos,253-399-1399,KristenCastillo@gmail.com
2,2,Emily Williamson,GZ3 Simbiat Abiola Road Lagos,557-451-2064,EmilyWilliamson@yahoo.com
3,3,Katherine Bauer,VT84 Adeniyi Jones Avenue Lagos,604-934-9744,KatherineBauer@gmail.com
4,4,Robert Pace,RC43 Adeyemo Alakija Street Lagos,653-398-4149,RobertPace@yahoo.com


In [11]:
# generating csv file
user_df.to_csv("customers.csv", index=False)

In [12]:
user_df.shape

(956, 5)

## Order Data

In [13]:
def order_data(
    *,
    len_meal_data: int,
    len_customer_data: int,
    len_place_data: int,
    len_order_data: int
) -> pd.DataFrame:
    
    list_product_id = range(len_meal_data)
    list_customers_id = range(len_customer_data)
    list_place_id = range(len_place_data)

    # create a dictionary for the features
    dict_ = {
        "order_id": [],
        "customer_id": [],
        "place_id": [],
        "product_id": [],
        "product_rating": [],
        "date": []
    }

    for i in range(len_order_data):
        # update order id
        dict_["order_id"].append(i)

        # generate customer id
        customer_id = random.choice(list_customers_id)
        dict_["customer_id"].append(customer_id)  # update customer id

        # generate place id
        place_id = random.choice(list_place_id)
        dict_["place_id"].append(place_id)  # update restaurant id

        # generate meal id
        product_id = random.choice(list_product_id)
        dict_["product_id"].append(product_id)  # update meal id

        # generate meal rating
        values = [1, 2, 3, 4, 5]
        weights = [0.1, 0.2, 0.2, 0.1, 0.4]
        product_rating = random.choices(values, weights=weights)[0]
        dict_["product_rating"].append(product_rating)  # update meal rating

        # Generate a random date within a range
        start_date = datetime(2022, 1, 1)  # Start date
        end_date = datetime(2022, 12, 31)  # End date

        # Calculate the number of days between start and end date
        days = (end_date - start_date).days

        # Generate a random number of days to add to the start date
        random_days = random.randint(0, days)

        # Add the random number of days to the start date
        date = (start_date + timedelta(days=random_days)).strftime("%m-%d-%y %H:%M")
        dict_["date"].append(date)

    # create a dataframe for the dict
    df = pd.DataFrame(dict_)

    # adding a review to the dataframe
    conditon = [
        (df["product_rating"] >= 4),
        (df["product_rating"] < 3),
        (df["product_rating"] == 3),
    ]
    choice = [1, -1, 0]
    df["review"] = np.select(condlist=conditon, choicelist=choice)

    return df

In [14]:
len_meal_data = 60
len_customer_data = 956
len_place_data = 69
len_order_data = 9024

# generate order data
order_df = order_data(
    len_customer_data=len_customer_data,
    len_meal_data=len_meal_data,
    len_place_data=len_place_data,
    len_order_data=len_order_data,
)
order_df

Unnamed: 0,order_id,customer_id,place_id,product_id,product_rating,date,review
0,0,481,29,15,4,12-23-22 00:00,1
1,1,498,67,21,5,06-26-22 00:00,1
2,2,45,56,43,5,12-19-22 00:00,1
3,3,858,35,49,2,05-27-22 00:00,-1
4,4,746,7,17,5,11-27-22 00:00,1
...,...,...,...,...,...,...,...
9019,9019,709,49,17,5,11-23-22 00:00,1
9020,9020,666,35,35,5,08-17-22 00:00,1
9021,9021,171,40,33,4,02-01-22 00:00,1
9022,9022,44,26,22,2,04-17-22 00:00,-1


In [15]:
# create a csv file
order_df.to_csv("order.csv", index=False)

## Restaurant Data

In [16]:
url_list = ["https://www.finelib.com/cities/lagos/business/food/restaurants/page-1",
    "https://www.finelib.com/cities/lagos/business/food/restaurants/page-2"]

def restaurant_data(*, URL: list) -> pd.DataFrame:

    # create dictionary with an emty list for the elements
    dict_ = {"restaurant_name": [], "restaurant_address": [], "restaurant_phone_no": []}

    # use for loop to scrape for each url
    for i in URL:
        # make a request from the url
        html_doc = requests.get(i).text

        # initalize the beautiful saoup library
        soup = BeautifulSoup(html_doc, "lxml")

        # getting the table column
        left_column = soup.find(name="div", class_="left-column")

        for idx, data in enumerate(left_column.find_all(name="div", class_="box-682")):
            # getting the infomations
            if idx > 0:
                # getting the name of the restaurant and stor it in the dictionary
                name_ = data.find(name="div", class_="box-headings")
                name = name_.a.text
                dict_["restaurant_name"].append(name)

                # getting the address and store it in the dictionary
                listing_info = data.find(name="div", class_="listing-info-img")
                address = listing_info.find(name="div", class_="cmpny-lstng-1").text
                dict_["restaurant_address"].append(address)

                # getting the restaurant phone number and store it in the dictionary
                phone_no_ = listing_info.find(name="div", class_="tel-no-div")
                phone_no = phone_no_.find(name="div", class_="cmpny-lstng-1").text
                dict_["restaurant_phone_no"].append(phone_no)

    # converting the dictionary to dataframe
    df = pd.DataFrame(dict_)

    return df

In [17]:
# get restaurant data
restaurant_df = restaurant_data(URL=url_list)
restaurant_df.head()

Unnamed: 0,restaurant_name,restaurant_address,restaurant_phone_no
0,Bernadines Cloud Kitchen,"35, Sholanke Akoka, Yaba, Lagos",0903 651 7676
1,Abibiz Restaurant,"Murtala Mohammed International Airport, Ikeja, Lagos, Nigeria","0803 302 4280, 01 773 1431"
2,Afi's Restaurant,"5 Olufemi Street, Surulere, Lagos, Nigeria",01 470 3044
3,Aldente,"S11 Lagos City Mall, Onikan, Lagos, Nigeria","01 444 3944, 01 791 2942"
4,All Seasons Restaurants,"Plot 867A, Bishop Aboyade Cole Street, Victoria Island, Lagos, Nigeria",01 262 3135


In [18]:

# save restaurant data
restaurant_df.to_csv("restaurant.csv", index_label="place_id")

## Merge Data Together

In [19]:
# order data
order = pd.read_csv("order.csv")

# customers data
customers = pd.read_csv("customers.csv")

# restaurants data
restaurant = pd.read_csv("restaurant.csv")

# meal data
meal = pd.read_csv("meal.csv")

In [20]:
df = order.merge(restaurant, on="place_id", how="left").merge(
    meal, on="product_id", how="left")
df.head()

Unnamed: 0,order_id,customer_id,place_id,product_id,product_rating,date,review,restaurant_name,restaurant_address,restaurant_phone_no,meal,restaurant_id,price
0,0,481,29,15,4,12-23-22 00:00,1,Demmies Delight,"254, Agege Motor Road, Mushin, Lagos, Nigeria",0802 208 3653,Groundnut soup and eba,1,2465.19
1,1,498,67,21,5,06-26-22 00:00,1,Topever Restaurant,"39, Market Street, Shomolu, Shomolu, Lagos, Nigeria","0806 209 8455, 0803 488 6706",Jollof spaghetti,1,7808.15
2,2,45,56,43,5,12-19-22 00:00,1,Mia Delizia Italian Restaurante,"Plot 3, Block 44 ,Samuel Edosa Street, Lagos Mainland, Lagos, Nigeria","0816 237 4907, 0818 231 9255",Rice and stew,2,3233.17
3,3,858,35,49,2,05-27-22 00:00,-1,Fazoc Restaurant,"26 Norman Williams Street, South West, Ikoyi, Lagos, Nigeria",0803 347 4840,Groundnut soup and eba,3,5348.04
4,4,746,7,17,5,11-27-22 00:00,1,Atlantic Bar & Restaurant,"14B Adeola Hopewell Street, Victoria Island, Lagos, Nigeria","0802 327 1564, 01 261 0584",Iyan and egusi soup,1,5507.21


In [21]:
df.shape

(9024, 13)