In [1]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from entity_generation import generate_business_entities
from datetime import datetime, timedelta

### Load the core data

In [33]:
ftl_df = pd.read_excel('ftl_items.xlsx', sheet_name='Sheet1')
ftl_df

Unnamed: 0,Food,Category,Supply_Chain
0,cilantro,Herbs (fresh),Farmed
1,parsley,Herbs (fresh),Farmed
2,basil,Herbs (fresh),Farmed
3,arugula,Leafy greens (fresh),Farmed
4,baby leaf,Leafy greens (fresh),Farmed
...,...,...,...
292,fuji apples,Fruit,Farmed
293,granny smith apples,Fruit,Farmed
294,blackberries,Fruit,Farmed
295,blueberries,Fruit,Farmed


In [34]:
ftl_df.Category.unique()

array(['Herbs (fresh)', 'Leafy greens (fresh)', 'Cheese', 'Nut butters',
       'Melons', 'Peppers', 'Sprouts', 'Tropical Tree Fruits', 'Seafood',
       'Ready-to-eat deli salads\xa0', 'Tomatoes', 'Shell eggs',
       'Cucumbers (fresh)', 'Fruit'], dtype=object)

In [3]:
entities_df = generate_business_entities()
entities_df

Unnamed: 0,businessType,businessName,primaryPhone,streetAddress,city,state,zip
0,grocery_no_food_bar,Fine Corner,0384859547,37094 David Rue,Donaldland,CA,19847
1,farm,Golden Pastures Farm,2904409227,0801 Eric Hill,Troyville,WA,09171
2,processor,Quality Cuisine,796-113-8650,20350 Brett Hills Apt. 764,South Marcia,ID,22342
3,packaging,Quality Seal,8818815815,252 Jorge Mount Suite 961,Port Lisa,MO,12982
4,processor,Global Provisions,(949)276-9502,092 Michael Isle,North Katelyn,AR,17770
...,...,...,...,...,...,...,...
9995,grocery,Gourmet Shop,516-501-6035,7683 Jordan Creek,Henrymouth,FL,49721
9996,distributor,Gourmet Services,1391229305,3797 Thomas Points Suite 725,Quinnton,MD,64544
9997,grocery_no_food_bar,Delicious Mart,(777)838-0334,9569 Nelson Canyon,New Christina,ID,55198
9998,land_based_receiver,Tidal Maritime,4374832796,29605 Schneider Fork Suite 515,Dennisport,NM,57722


In [4]:
entities_df.businessType.unique()

array(['grocery_no_food_bar', 'farm', 'processor', 'packaging',
       'distributor', 'land_based_receiver', 'fish_farm', 'grocery',
       'wholesaler', 'restaurant'], dtype=object)

# Data Generation Functions

### Generate Supply Chain

In [35]:
def generate_supply_chain(ftl_item):
    #Initialize the supply chain variables
    chain = []

    farm = 'farm'
    field_packed = 'fieldPacked'
    packaging_processor = 'packagingProcessor'
    food_processor = 'foodProcessor'
    kill = 'stop'
    lbr = 'landBasedReceiver'
    direct_sale = 'directToConsumer'
    seaFarm = 'seafoodFarm'
    agg = 'aggregator'
    rest = 'restaurant'
    grocNoFood = 'groceryNoFood'
    grocFood = 'grocery'
    dist = 'distributor'
    whole = 'wholesaler'


    #Determine if the food will be an empty node item - do this after Zac sends the rest of the supply chain info

    #Farmed Product Supply Chain Route
    if ftl_item.Supply_Chain.values[0] == 'Farmed':
        chain.append(farm)
        #Determine if field packed or processed
        pack_int = random.randint(0,100)
        if pack_int >= 50:
            chain.append(field_packed)
        else:
            chain.append(packaging_processor)

    #Created Product Supply Chain Route
    elif ftl_item.Supply_Chain.values[0] == 'Created':
        chain.append(food_processor)
        if ftl_item.Category.values[0] == 'Cheese' or ftl_item.Category.values[0] == 'Nut Butters':
            rand = random.randint(0,100)
            if rand > 50:
                chain.append(food_processor)
            #Determine Cheese Continued Manufacturing Route
            if ftl_item.Category.values[0] == 'Nut Butters':
                if rand > 75:
                    chain.append(food_processor)
                else:
                    chain.append(kill)
            else:
                chain.append(kill)

    #Fish Supply Chain Route
    elif ftl_item.Category.values[0] == 'Seafood':

        #Caught fish Supply Chain Route
        if ftl_item.Supply_Chain.values[0] == 'Caught':
            chain.append(lbr)
            #Fill in the rest here

        #Aquaculture Supply Chain Route
        if ftl_item.Supply_Chain.values[0] == 'Aquaculture':
            chain.append(seaFarm)

        #Determine if the food will go to an aggregator
        rand = random.randint(0,100)
        if rand > 50:
            chain.append(agg)

        #Determine if the food goes through a kill step or continues to get processed
        chain.append(food_processor)

        rand = random.randint(0,100)
        if rand > 50:
            chain.append(food_processor)

            if rand > 75:
                chain.append(food_processor)

            else:
                chain.append(kill)

        else:
            chain.append(kill)

            
    if chain[-1] != kill:
        #Initialize the retail options
        retail_options = [rest, grocFood, grocNoFood]
        
        #Sale Route - this will be the same for every food
        sale_int = random.randint(0,100)
        #Determine if it is sold direct-to-consumer
        if sale_int <= 7:
            sale_int = random.randint(0,100)
            if sale_int <= 27:
                chain.append(direct_sale)
            else:
                chain.append(random.choice(retail_options))
        else:
        
            #If not, do the indirect sales route
            route_1 = random.randint(0,100)
            if route_1 <= 50:
                chain.append(dist)
                route_2 = random.randint(0,100)
                if route_2 <= 20:
                    chain.append(whole)
                else:
                    chain.append(random.choice(retail_options))
                
            else:
                chain.append(food_processor)
                route_2 = random.randint(0,100)
                if route_2 <= 50:
                    chain.append(dist)
                    route_2 = random.randint(0,100)
                    if route_2 <= 20:
                        chain.append(whole)
                    else:
                        chain.append(random.choice(retail_options))
                else:
                    route_2 = random.randint(0,100)
                    if route_2 <= 20:
                        chain.append(whole)
                    else:
                        chain.append(random.choice(retail_options))
            if chain[-1] == whole:
                route_int = random.randint(0,100)
                if route_int < 50:
                    chain.append(random.choice(retail_options))

    return chain

### CTE Generation Functions

In [18]:
field_name_list = ['Field A-08',
              'Field A-09',
              'Field B-09',
              'Field A-10',
              'Field C-10',
              'Field G-20',
              'Field V-09',
              'Bed 34',
              'Acre P30',
              'Acre P23',
              'Walker Field',
              'Garden Bed',
              'Rose Garden',
              'Hillock Field',
              'Grasslands',
              'Ranch P20',
              'Barn F 30'
              ]

container_name_list = ['Pond',
                       'Pool',
                       'Tank',
                       'Cage']

def harvesting_cte(fake, ftl_item, farm, next_entity, field_name_list = field_name_list, container_name_list=container_name_list):
    #Determine date
    start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
    end_date = datetime.strptime('2023-06-01','%Y-%m-%d')
    date_harvested = str(fake.date_between_dates(date_start=start_date, date_end=end_date))

    data_submitter = farm.businessName.values[0]
    food_name = ftl_item.Food.values[0]
    quantity = fake.random_int(min=1, max=1000)
    recipient = next_entity.businessName.values[0]
    unit_of_measure = fake.random_element(elements=('kg', 'g', 'lbs', 'Dozen'))
    farm_name = farm.businessName.values[0]
    phone_number = farm.primaryPhone.values[0]

    #Determine what field or container was used
    if ftl_item.Supply_Chain.values[0] == 'Farmed':
        field_name = random.choice(field_name_list)
        container = 'n/a'
    elif ftl_item.Supply_Chain.values[0] == 'Aquaculture':
        field_name = 'n/a'
        container = random.choice(container_name_list) + ' ' + str(random.randint(1,10))
    
    #Need to add location description of farm where it was harvested
    harvesting_info = {
        'dataSubmitter': data_submitter,
        'recipient' : recipient,
        'commodity': food_name,
        'quantity' : quantity,
        'unitOfMeasure' : unit_of_measure,
        'farmName' : farm_name,
        'fieldName' : field_name,
        'containerName' : container,
        'cteDate' : date_harvested,
        'phoneNumber' : phone_number    
    }

    return harvesting_info

In [19]:
def cooling_cte(harvesting_info, ftl_item, facility, next_entity):

    data_submitter = facility.businessName.values[0]
    food_name = ftl_item.Food.values[0]
    quantity = harvesting_info['quantity']
    recipient = next_entity.businessName.values[0]
    unit_of_measure = harvesting_info['unitOfMeasure']
    farm_name = harvesting_info['dataSubmitter']
    cooler_location = facility.businessName.values[0]
    date_cooled = harvesting_info['cteDate']
    phone_number = facility.primaryPhone.values[0]
    
    #Need to add location description of farm where it was harvested
    cooling_info = {
        'dataSubmitter': data_submitter,
        'recipient' : recipient,
        'commodity': food_name,
        'quantity' : quantity,
        'unitOfMeasure' : unit_of_measure,
        'coolerLocation' : cooler_location,
        'cteDate' : date_cooled,
        'harvesterName' : farm_name,
        'phoneNumber' : phone_number    
    }

    return cooling_info

In [20]:
def packaging_cte(fake, harvesting_info, cooling_info, ftl_item, facility):
    # List of packaging types
    packaging_type = ['Box', 'Bag', 'Crate', 'Can', 'Bottle', 'Jar', 'Pouch', 'Carton']

    data_submitter = facility.businessName.values[0]
    package_type = random.choice(packaging_type)
    quantity = fake.random_int(min=1, max=1000)
    unit_of_measure = fake.random_element(elements=('kg', 'g', 'lbs', 'Dozen'))
    packaging_date = str(datetime.strptime(cooling_info['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))
    phone_number = fake.basic_phone_number()
    traceability_lot_code = fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    product_description = harvesting_info['dataSubmitter'] + ' ' + harvesting_info['commodity'] + ', ' + str(fake.random_int(min=1, max= 50)) + unit_of_measure + ' case'


    packaging_info = {
        'dataSubmitter': data_submitter,
        'commodity':ftl_item.Food.values[0],
        'dateFoodReceived' : packaging_date,
        'quantityReceived':harvesting_info['quantity'],
        'harvestingLocation':harvesting_info['dataSubmitter'],
        'harvestedField':harvesting_info['fieldName'], #For produce
        'harvestedContainer':harvesting_info['containerName'], #For Aquaculture
        'harvestedPhoneNumber':harvesting_info['phoneNumber'],
        'dateHarvested':harvesting_info['cteDate'],
        'coolingLocation':cooling_info['dataSubmitter'],
        'dateOfCooling':cooling_info['cteDate'],
        'traceabilityLotCode': traceability_lot_code,
        'productDescription':product_description,
        'quantity' : quantity,
        'packageType': package_type,
        'traceabilityLotCodeSourceLocation':facility.businessName.values[0],
        'cteDate' : packaging_date,
        'referenceDocumentTypeNumber': 'IP WO ' + str(random.randint(10000,50000)) 
    }

    return packaging_info

In [21]:
def shipping_cte(previous_cte, next_entity, facility):

    shippedDate = str(datetime.strptime(previous_cte['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))

    shipping_info = {
        'dataSubmitter': facility.businessName.values[0],
        'traceabilityLotCode': previous_cte['traceabilityLotCode'],
        'quantity': previous_cte['quantity'],
        'unitOfMeasure':previous_cte['unitOfMeasure'],
        'productDescription': previous_cte['productDescription'],
        'subsequentLocation': next_entity.businessName.values[0],
        'previousSourceLocation': previous_cte['dataSubmitter'],
        'cteDate': shippedDate,
        'traceabilityLotCodeSourceLocation': previous_cte['traceabilityLotCodeSourceLocation'],
        'referenceDocumentTypeNumber': 'BOL ' + str(random.randint(10000,50000)) 
    }

    return shipping_info

In [22]:
def receiving_cte(previous_cte, facility):
    receivingDate = str(datetime.strptime(previous_cte['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))

    receiving_info = {
        'dataSubmitter': facility.businessName.values[0],
        'traceabilityLotCode': previous_cte['traceabilityLotCode'],
        'quantity': previous_cte['quantity'],
        'unitOfMeasure':previous_cte['unitOfMeasure'],
        'productDescription': previous_cte['productDescription'],
        'previousSourceLocation': previous_cte['dataSubmitter'],
        'receivingLocation': facility.businessName.values[0],
        'cteDate': receivingDate,
        'traceabilityLotCodeSourceLocation': previous_cte['traceabilityLotCodeSourceLocation'],
        'referenceDocumentTypeNumber': 'BOL ' + str(random.randint(10000,50000))
    }

    return receiving_info

In [23]:
def first_land_based_receiver_cte(fake, ftl_item, facility):
    #Determine the dates of harvest and landing
    start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
    end_date = datetime.strptime('2023-06-01','%Y-%m-%d')
    firstHarvestDate = fake.date_between_dates(date_start=start_date, date_end=end_date)
    secondHarvestDate = firstHarvestDate + timedelta(days=random.randint(2,10))

    dateLanded = secondHarvestDate + timedelta(days=random.randint(1,3))

    #Determine Harvest Location
    secondLine = 'Major Fishing Area ' + str(random.randint(1,10))
    thirdLine = random.choice(['Northern', 'Southern', 'Central']) + ' ' + random.choice(['Pacific','Atlantic']) 

    harvestDateAndLocation = str(firstHarvestDate) + ' - ' + str(secondHarvestDate) + '\n' + secondLine + '\n' + thirdLine

    #Determine the quantity and unit of measure
    quantity = random.randint(20,1000)
    unitOfMeasure = random.choice(['kg', 'lb'])

    #Determine the traceability lot code
    traceability_lot_code = fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')

    first_land_based_receiver_info = {
        'dataSubmitter':facility.businessName.values[0],
        'traceabilityLotCode':traceability_lot_code,
        'fishType':ftl_item.Food.values[0],
        'quantity':quantity,
        'unitOfMeasure':unitOfMeasure,
        'harvestDateAndLocation':harvestDateAndLocation,
        'traceabilityLotCodeSourceLocation':facility.businessName.values[0],
        'cteDate':str(dateLanded),
        'referenceDocumentTypeNumber': 'Landing Record: ' + str(random.randint(10000,50000))
    }

    return first_land_based_receiver_info

### Supply Chain Functions

In [24]:
def farm_function(fake, ftl_item, supply_chain, entities_df):
    farm = entities_df[entities_df.businessType == 'farm'].sample()
    packaged_type = supply_chain[supply_chain.index('farm') + 1]

    #Initialize the CTEs for the farm
    ctes = {}

    #Determine what the next entity is for the KDEs that happen on the farm
    if packaged_type == 'fieldPacked':
        next_entity = farm
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)
        ctes['cooling'] = cooling_cte(ctes['harvesting'], ftl_item, farm, next_entity)
        ctes['initialPackaging'] = packaging_cte(fake,ctes['harvesting'],ctes['cooling'],ftl_item,farm)
    elif packaged_type == 'packagingProcessor':
        next_entity = entities_df[entities_df.businessType == 'packaging'].sample()
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)

    return ctes


In [25]:
def initial_fish_function(fake, ftl_item, entities_df):
    category = ftl_item.Supply_Chain.values[0]

    ctes={}
    #Aquaculture route
    if category == 'Aquaculture':
        farm = entities_df[entities_df.businessType == 'fish_farm'].sample()
        next_entity=farm
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)
        ctes['cooling'] = cooling_cte(ctes['harvesting'], ftl_item, farm, next_entity)
        ctes['initialPackaging'] = packaging_cte(fake,ctes['harvesting'],ctes['cooling'],ftl_item,farm)

    #Wild Caught Route
    elif category =='Caught':
        facility = entities_df[entities_df.businessType == 'land_based_receiver'].sample()
        ctes['firstLandBasedReceiving'] = first_land_based_receiver_cte(fake, ftl_item, facility)

    return ctes 

# Generate the Data

In [26]:
fake = Faker()

In [39]:
food_item = ftl_df[ftl_df.Supply_Chain == 'Farmed'].sample()
sc = generate_supply_chain(food_item)
ctes = farm_function(fake, food_item, sc, entities_df)
ctes

{'harvesting': {'dataSubmitter': 'Golden Grove Farm',
  'recipient': 'Golden Grove Farm',
  'commodity': 'blueberries',
  'quantity': 17,
  'unitOfMeasure': 'g',
  'farmName': 'Golden Grove Farm',
  'fieldName': 'Field A-10',
  'containerName': 'n/a',
  'cteDate': '2022-09-14',
  'phoneNumber': '7455948320'},
 'cooling': {'dataSubmitter': 'Golden Grove Farm',
  'recipient': 'Golden Grove Farm',
  'commodity': 'blueberries',
  'quantity': 17,
  'unitOfMeasure': 'g',
  'coolerLocation': 'Golden Grove Farm',
  'cteDate': '2022-09-14',
  'harvesterName': 'Golden Grove Farm',
  'phoneNumber': '7455948320'},
 'initialPackaging': {'dataSubmitter': 'Golden Grove Farm',
  'commodity': 'blueberries',
  'dateFoodReceived': '2022-09-14 00:00:00',
  'quantityReceived': 17,
  'harvestingLocation': 'Golden Grove Farm',
  'harvestedField': 'Field A-10',
  'harvestedContainer': 'n/a',
  'harvestedPhoneNumber': '7455948320',
  'dateHarvested': '2022-09-14',
  'coolingLocation': 'Golden Grove Farm',
  'd

In [43]:
food_item = ftl_df[ftl_df.Supply_Chain == 'Caught'].sample()
sc = generate_supply_chain(food_item)
ctes = initial_fish_function(fake, food_item, entities_df)
ctes

{'firstLandBasedReceiving': {'dataSubmitter': 'Marine Catchers',
  'traceabilityLotCode': 'NT-1245',
  'fishType': 'crab',
  'quantity': 416,
  'unitOfMeasure': 'kg',
  'harvestDateAndLocation': '2023-01-26 - 2023-01-29\nMajor Fishing Area 1\nSouthern Pacific',
  'traceabilityLotCodeSourceLocation': 'Marine Catchers',
  'cteDate': '2023-02-01',
  'referenceDocumentTypeNumber': 'Landing Record: 37518'}}

In [28]:
%%time
simmed_food = []
for _ in range(10000):
    food_item = ftl_df[ftl_df.Supply_Chain == 'Caught'].sample()
    sc = generate_supply_chain(food_item)
    ctes = initial_fish_function(fake, food_item, sc, entities_df)
    simmed_food.append(ctes)

CPU times: user 15.3 s, sys: 159 ms, total: 15.5 s
Wall time: 15.6 s


In [30]:
len(simmed_food)

10000

In [183]:
str(date)

'2023-02-02'

In [23]:
print('2023-04-06 - 2023-04-16\nMajor Fishing Area 1\nSouthernAtlantic')

2023-04-06 - 2023-04-16
Major Fishing Area 1
SouthernAtlantic
