In [102]:
import pandas as pd
import numpy as np 
from faker import Faker
from enum import Enum
from typing import List
import string
import uuid

In [141]:
person_entity = {
    "userId": str, 
    "username": str, 
    "password": str, 
    "email": str, 
    "name": str, 
    "surname": str, 
    "clinical_gender": ["M", "F"], 
    "current_location": [], 
    "age_range": ["20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90-100"],
    "living_country": [], 
    "country_of_origin": []
    }

user_entity = {
    "current_working_status": ["Half-time-worker", "Full-time-worker", "Self-employee", "Unemployed"],
    "marital_status": ["Single", "Married"], 
    "life_style": ["Sedentary", "Lightly active", "Moderately active", "Very active"], 
    "weight": [],
    "ethnicity": ["White", "Black", "Latino", "Asian"], 
    "height": []
}

cultural_factors = {
    "vegan_observant": [True, False],
    "vegetarian_observant": [True, False], 
    "halal_observant": [True, False], 
    "kosher_observant": [True, False],
    "religion_observant": [True, False], 
    "drink_limitation": [True, False], 
    "pescatarian_observant": [True, False],
    "religion": [],
    "food_limitation": []
}

sustainability = {
    "environmental_harm": [],
    "eco_score": [],
    "co2_food_print": [],
    "recyclable_packaging": []
}

actions = {
    "action_type": [],
    "location": [], 
    "action_date": []
}

preferences = {
    "breakfast_time": [],
    "lunch_time": [],
    "dinner_time": []
}

health_conditions = {
    "food_allergies": []
}

user_goals = {
    "user_goals": ["loss_weight", "fit", "food_restrictions"]
}

cultural_factors = {
    "cultural_factors": []
}

diet = {
    "diet_daily_calories": [],
    "calorie_deficit": []
}

    

In [8]:
df_personal_data = pd.DataFrame(data=[], columns = list(person_entity.keys()))

In [13]:
# set seed for random generator 
np.random.seed(0)

In [14]:
df_personal_data["clinical_gender"] = np.random.choice(np.array(person_entity.get("clinical_gender")), 
                                                       size=500, 
                                                       replace=True, 
                                                       p=[0.5, 0.5])

In [19]:
fake = Faker()

In [18]:
class Gender(str, Enum):
    male = "M"
    female = "F"

In [22]:
def create_name_surname(gender: str) -> str:
    if gender == Gender.male:
        names = fake.name_male()
    else:
        names = fake.name_female()
    return names.split(" ")

In [53]:
def generate_country(samples) -> List:
    return list(map(lambda x: fake.country(), range(samples)))

In [60]:
def generate_email_from_name(name: str, surname: str, domain: str="fake.com"):
    return f"{name.lower()}.{surname.lower()}@{domain.lower()}"

In [96]:
def password_generation(length): 
    chars = string.ascii_letters + string.digits
    list_chars = list(chars)
    password = np.random.choice(list_chars, length)
    return ''.join(password)

In [115]:
def generate_age_range(probabilities = None, list_age_range:List = person_entity.get("age_range")):
    return np.random.choice(list_age_range, size=1, replace=True, p=probabilities)[0]

In [128]:
def generate_localization(samples): 
    return list(map(lambda x: fake.locale(), range(samples)))

In [30]:
names = df_personal_data["clinical_gender"].apply(create_name_surname)

In [37]:
names_list = list(zip(*names))

In [38]:
df_personal_data["name"] = names_list[0]
df_personal_data["surname"] = names_list[1]

In [129]:
df_personal_data["country_of_origin"] = generate_country(500)
df_personal_data["living_country"] = generate_country(500)
df_personal_data["current_location"] = generate_country(500)
df_personal_data["current_location"] = generate_localization(500)

In [76]:
list_names = list(zip(*df_personal_data[["name", "surname"]].values.tolist()))

In [80]:
df_personal_data["email"] =list(map(lambda x, y: generate_email_from_name(x, y), list_names[0], list_names[1]))

In [98]:
df_personal_data["password"] = list(map(lambda x: password_generation(8), range(500)))

In [110]:
df_personal_data["username"] = df_personal_data["name"].apply(lambda x: x.lower()+str(uuid.uuid4()).split("-")[-2])
df_personal_data["userId"] = df_personal_data["name"].apply(lambda x: x.lower()+str(uuid.uuid4()).split("-")[-2])

In [122]:
df_personal_data["age_range"] = list(map(lambda x: generate_age_range(), range(500)))

In [130]:
df_personal_data.head(7)

Unnamed: 0,userId,username,password,email,name,surname,clinical_gender,current_location,age_range,living_country,country_of_origin
0,cynthiaa85a,cynthiabf65,uR2aKTPc,cynthia.franklin@fake.com,Cynthia,Franklin,F,ka_GE,30-39,Guadeloupe,Canada
1,gloria81ec,gloriaa8e7,Hmp2Hxmf,gloria.flores@fake.com,Gloria,Flores,F,el_GR,20-29,Guinea,Guadeloupe
2,karena0a0,karen9701,Mbkg9gMM,karen.mason@fake.com,Karen,Mason,F,lo_LA,90-100,Peru,Germany
3,autumnbd78,autumn9165,lYvYYZcm,autumn.peterson@fake.com,Autumn,Peterson,F,gv_GB,20-29,Monaco,Sao Tome and Principe
4,robert9174,robert8465,DjZif01R,robert.moore@fake.com,Robert,Moore,M,de_LU,20-29,Antigua and Barbuda,Niger
5,anna9399,anna924c,bRpmVmv4,anna.roberts@fake.com,Anna,Roberts,F,pt_BR,40-49,French Guiana,Russian Federation
6,anthonyb087,anthony9d52,sznECSPv,anthony.house@fake.com,Anthony,House,M,my_MM,50-59,Cambodia,Turkey


In [131]:
# save data 
df_personal_data.to_csv("personal_profiles.csv", index=False)

# 2. Generate User data 

In [200]:
user_columns = ["userId", "current_working_status", "marital_status", "life_style", "weight", "ethnicity", "height"]
df_user_entity = pd.DataFrame(data=[], columns=user_columns)

In [201]:
df_user_entity["userId"] = df_personal_data["userId"]

In [202]:
def choose_one_from_list(list_values: List, 
                         samples : int,  
                         probabilities: List = None, 
                         size: int =1,
                         replace: bool = True):
    return list(map(lambda x: np.random.choice(list_values, size=size, replace=replace, p=probabilities), range(samples)))

In [203]:
df_user_entity["current_working_status"] = choose_one_from_list(user_entity.get("current_working_status"), samples=500)
df_user_entity["marital_status"] = choose_one_from_list(user_entity.get("marital_status"), samples=500)
df_user_entity["life_style"] = choose_one_from_list(user_entity.get("life_style"), samples=500)
df_user_entity["ethnicity"] = choose_one_from_list(user_entity.get("ethnicity"), samples=500)

In [204]:
# generate BMI cases 
BMI_values = ["underweight", "healthy", "overweight", "obesity"]
BMI_prob = [0.1, 0.3, 0.3, 0.3]
bmis = np.random.choice(BMI_values, size=500, replace=True, p=BMI_prob)

In [205]:
male_height = np.random.normal(170, 10, 500)
female_height = np.random.normal(160, 10, 500)

In [206]:
# set height 
df_user_entity["BMI"] = bmis

In [207]:
df_user_entity.loc[df_personal_data["clinical_gender"] == 'F',"height"] = np.random.choice(female_height, size=246)
df_user_entity.loc[df_personal_data["clinical_gender"] == 'M',"height"] = np.random.choice(male_height, size=254)

In [208]:
df_user_entity["height"] = df_user_entity["height"].astype(int)

In [235]:
class BMI_constants(str, Enum):
    underweight = "underweight"
    healthy = "healthy"
    overweight = "overweight"
    obesity = "obesity"

In [210]:
# set the weight 
def calculate_weight_from_height(height: float, bmi: string):
    bmi_numeric = 0.0
    if bmi == BMI_constants.underweight:
        bmi_numeric = 18.0
    elif bmi == BMI_constants.healthy:
        bmi_numeric = 21.0
    elif bmi == BMI_constants.overweight:
        bmi_numeric = 28.0
    else:
        bmi_numeric = 32.0
    return (height**2)*bmi_numeric

In [211]:
df_user_entity["weight"] = np.round(df_user_entity.apply(lambda row: calculate_weight_from_height(row["height"]/100.0, row["BMI"]), axis=1), 2)

In [212]:
df_user_entity["current_working_status"] = df_user_entity["current_working_status"].apply(lambda x: x[0])
df_user_entity["marital_status"] = df_user_entity["marital_status"].apply(lambda x: x[0])
df_user_entity["life_style"] = df_user_entity["life_style"].apply(lambda x: x[0])
df_user_entity["ethnicity"] = df_user_entity["ethnicity"].apply(lambda x: x[0])

In [213]:
df_user_entity.head(6)

Unnamed: 0,userId,current_working_status,marital_status,life_style,weight,ethnicity,height,BMI
0,cynthiaa85a,Half-time-worker,Married,Very active,47.88,Asian,151,healthy
1,gloria81ec,Half-time-worker,Married,Lightly active,65.55,White,153,overweight
2,karena0a0,Unemployed,Single,Moderately active,41.04,Latino,151,underweight
3,autumnbd78,Full-time-worker,Married,Moderately active,50.45,Latino,155,healthy
4,robert9174,Half-time-worker,Married,Sedentary,67.29,Black,179,healthy
5,anna9399,Full-time-worker,Single,Moderately active,40.5,Asian,150,underweight


In [214]:
df_user_entity.to_csv("user_entity.csv", index=False)

# 3. Generate health conditions 

In [217]:
df_health_conditions = pd.DataFrame(data=[], columns=["userId", "allergy"])

In [219]:
df_health_conditions["userId"] = df_user_entity["userId"]

In [223]:
# allergy array 
allergies = ["cow's milk", "eggs", "peanut", "soy", "fish", "tree nuts", "shellfish", "wheat", "None"]
allergies_prob = [0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.4]

In [224]:
user_allergies = np.random.choice(allergies, size=500, replace=True, p = allergies_prob)

In [226]:
df_health_conditions["allergy"] = user_allergies

In [231]:
df_health_conditions.head(6)

Unnamed: 0,userId,allergy
0,cynthiaa85a,fish
1,gloria81ec,
2,karena0a0,
3,autumnbd78,
4,robert9174,
5,anna9399,shellfish


In [232]:
df_health_conditions.to_csv("health_conditions.csv", index=False)

# 4. Generate user goals

In [233]:
# define user goals 
# define loose weight 
df_user_goals = pd.DataFrame(columns=["userId", "nutrition_goal"], data=[])

In [234]:
df_user_goals["userId"] = df_user_entity["userId"]

In [246]:
class NutritionGoals(str, Enum):
    lose_weight = "lose_weight"
    maintain_fit = "maintain_fit"
    gain_weight = "gain_weight"

In [241]:
def define_user_goal_according_BMI(bmi: str):
    if bmi == BMI_constants.underweight:
        # goal gain muscle 
        return f"{NutritionGoals.gain_weight}"
    elif bmi == BMI_constants.healthy:
        # Maintain fit and increase activity if required 
        return f"{NutritionGoals.maintain_fit}"
    else:
        # nutritional goal loss weight 
        return f"{NutritionGoals.lose_weight}"

In [247]:
df_user_goals["nutrition_goal"]=df_user_entity["BMI"].apply(lambda x: define_user_goal_according_BMI(x))

In [248]:
df_user_goals.head(6)

Unnamed: 0,userId,nutrition_goal
0,cynthiaa85a,maintain_fit
1,gloria81ec,lose_weight
2,karena0a0,gain_weight
3,autumnbd78,maintain_fit
4,robert9174,maintain_fit
5,anna9399,gain_weight


# 4. Generate cultural factors 

In [250]:
df_cultural_factors = pd.DataFrame(data=[], columns=["userId", "cultural_factor"])

In [260]:
df_cultural_factors["userId"] = df_personal_data["userId"]

In [255]:
food_restrictions = ["vegan_observant", "vegetarian_observant", "halal_observant", "kosher_observant", "None"]
food_restriction_probs = [0.2, 0.3, 0.05, 0.05, 0.4]

In [256]:
food_restrictions_user = np.random.choice(food_restrictions, size=500, replace=True, p=food_restriction_probs)

In [258]:
df_cultural_factors["cultural_factor"] = food_restrictions_user

In [261]:
df_cultural_factors.head(6)

Unnamed: 0,userId,cultural_factor
0,cynthiaa85a,vegan_observant
1,gloria81ec,
2,karena0a0,
3,autumnbd78,vegetarian_observant
4,robert9174,
5,anna9399,


In [263]:
df_cultural_factors.to_csv("cultural_factors.csv", index=False)

# 5. Generate preferences

In [264]:
df_preferences = pd.DataFrame(data=[], columns=["userId", "breakfast_time", "lunch_time", "dinner_time"])

In [266]:
df_preferences["userId"] = df_personal_data["userId"]

In [268]:
breakfast_time = np.random.normal(7, 1, size=500)
lunch_time = np.random.normal(13, 1, size=500)
dinner_time = np.random.normal(20, 1, size=500)

In [271]:
df_preferences["breakfast_time"] = np.round(breakfast_time, 2)
df_preferences["lunch_time"] = np.round(lunch_time, 2)
df_preferences["dinner_time"] = np.round(dinner_time, 2)

In [272]:
df_preferences.head(6)

Unnamed: 0,userId,breakfast_time,lunch_time,dinner_time
0,cynthiaa85a,7.15,11.98,22.33
1,gloria81ec,6.96,13.32,20.13
2,karena0a0,6.14,13.31,19.69
3,autumnbd78,4.91,13.9,18.16
4,robert9174,6.8,13.72,20.36
5,anna9399,6.33,13.36,21.48


In [273]:
df_preferences.to_csv("preferences.csv", index=False)

# 6. Generate Diet Therapy 

In [274]:
def calculate_basal_metabolic_rate(weight: float, height: float, age: int):
    #TODO: add women formula 
    BMR = 88.362 + (13.397 * weight) + (4.799 * height) - (5.677 * age)
    return BMR

In [None]:
def calculate_daily_calory_needs(BMR: float, activity_level: str, ):
    #TODO: add calorie deficit, calory gain and calorie maintain
    pass