In [1]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from entity_generation import generate_business_entities
from datetime import datetime, timedelta

### Load the core data

In [2]:
ftl_df = pd.read_excel('ftl_items.xlsx', sheet_name='Sheet1')
ftl_df

Unnamed: 0,Food,Category,Supply_Chain
0,cilantro,Herbs (fresh),Farmed
1,parsley,Herbs (fresh),Farmed
2,basil,Herbs (fresh),Farmed
3,arugula,Leafy greens (fresh),Farmed
4,baby leaf,Leafy greens (fresh),Farmed
...,...,...,...
292,fuji apples,Fruit,Farmed
293,granny smith apples,Fruit,Farmed
294,blackberries,Fruit,Farmed
295,blueberries,Fruit,Farmed


In [3]:
entities_df = generate_business_entities()
entities_df

Unnamed: 0,businessType,businessName,primaryPhone,streetAddress,city,state,zip
0,fish_farm,Marine Aquafarm,(920)742-8059,121 Murphy Crossroad,East Bradfort,TX,54220
1,restaurant,Fusion Kitchen,(759)280-1638,17989 Sherri Squares,Lake Chris,NV,27818
2,land_based_receiver,Harbor Catches,967-123-3055,12269 Jose Mountains Apt. 444,New Ryanville,KS,46876
3,grocery_no_food_bar,Natural Shop,7164067039,42524 Cooper Creek Suite 092,Royside,CO,14322
4,distributor,Trusted Supply,862-633-3671,909 April Vista Apt. 840,Howardport,NV,94946
...,...,...,...,...,...,...,...
9995,land_based_receiver,Fresh Catches,(381)608-1263,928 Lindsay Shoals Suite 398,Lake Stevenmouth,IN,54420
9996,farm,Green Fields Farm,535-698-4993,55363 John Wall,East Jacob,WV,55908
9997,packaging,Premium Pack,0134639745,672 Michael Junctions,Curtisview,TX,25653
9998,packaging,Premium Encase,2307084044,898 Andre Cove,North John,AS,33897


In [4]:
entities_df.businessType.unique()

array(['fish_farm', 'restaurant', 'land_based_receiver',
       'grocery_no_food_bar', 'distributor', 'processor', 'grocery',
       'farm', 'wholesaler', 'packaging'], dtype=object)

# Data Generation Functions

### Generate Supply Chain

In [5]:
def generate_supply_chain(ftl_item):
    #Initialize the supply chain
    chain = []

    #Determine if the food will be an empty node item - do this after Zac sends the rest of the supply chain info

    #Farmed Product Supply Chain Route
    if ftl_item.Supply_Chain.values[0] == 'Farmed':
        chain.append('farm')
        #Determine if field packed or processed
        pack_int = random.randint(0,100)
        if pack_int >= 50:
            chain.append('field_packed')
        else:
            chain.append('packaging_processor')

    #Created Product Supply Chain Route
    elif ftl_item.Supply_Chain.values[0] == 'Created':
        chain.append('food_manufacturing')
        if ftl_item.Category.values[0] == 'Cheese' or ftl_item.Category.values[0] == 'Nut Butters':
            chain.append('food_manufacturing')
            #Determine Cheese Continued Manufacturing Route
            if ftl_item.Category.values[0] == 'Cheese':
                if random.randint(0,100) > 50:
                    chain.append('food_manufacturing')
                    if random.randint(0,100) > 50:
                        chain.append('food_manufacturing')

    #Fish Supply Chain Route
    elif ftl_item.Category.values[0] == 'Fish':

        #Caught fish Supply Chain Route
        if ftl_item.Supply_Chain.values[0] == 'Caught':
            chain.append('land_based_receiver')
            #Fill in the rest here

        #Aquaculture Supply Chain Route
        if ftl_item.Supply_Chain.values[0] == 'Aquaculture':
            chain.append('fish_farm')
            #Fill in the rest here

        if random.randint(0,100) > 50:
            chain.append('food_manufacturing')

            if random.randint(0,100) > 50:
                chain.append('food_manufacturing')

    #Initialize the retail options
    retail_options = ['restaurant', 'grocery_no_food_bar', 'grocery']
    
    #Sale Route - this will be the same for every food
    sale_int = random.randint(0,100)
    #Determine if it is sold direct-to-consumer
    if sale_int <= 7:
        sale_int = random.randint(0,100)
        if sale_int <= 27:
            chain.append('direct-to-consumer')
        else:
            chain.append(random.choice(retail_options))
    else:
    
        #If not, do the indirect sales route
        route_1 = random.randint(0,100)
        if route_1 <= 50:
            chain.append('distributor')
            route_2 = random.randint(0,100)
            if route_2 <= 20:
                chain.append('wholesaler')
            else:
                chain.append(random.choice(retail_options))
            
        else:
            chain.append('processor')
            route_2 = random.randint(0,100)
            if route_2 <= 50:
                chain.append('distributor')
                route_2 = random.randint(0,100)
                if route_2 <= 20:
                    chain.append('wholesaler')
                else:
                    chain.append(random.choice(retail_options))
            else:
                route_2 = random.randint(0,100)
                if route_2 <= 20:
                    chain.append('wholesaler')
                else:
                    chain.append(random.choice(retail_options))
        if chain[-1] == 'wholesaler':
            route_int = random.randint(0,100)
            if route_int < 50:
                chain.append(random.choice(retail_options))

    return chain

### CTE Generation Functions

In [6]:
field_name_list = ['Field A-08',
              'Field A-09',
              'Field B-09',
              'Field A-10',
              'Field C-10',
              'Field G-20',
              'Field V-09',
              'Bed 34',
              'Acre P30',
              'Acre P23',
              'Walker Field',
              'Garden Bed',
              'Rose Garden',
              'Hillock Field',
              'Grasslands',
              'Ranch P20',
              'Barn F 30'
              ]

container_name_list = ['Pond',
                       'Pool',
                       'Tank',
                       'Cage']

def harvesting_cte(fake, ftl_item, farm, next_entity, field_name_list = field_name_list, container_name_list=container_name_list):
    #Determine date
    start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
    end_date = datetime.strptime('2023-06-01','%Y-%m-%d')
    date_harvested = str(fake.date_between_dates(date_start=start_date, date_end=end_date))

    data_submitter = farm.businessName.values[0]
    food_name = ftl_item.Food.values[0]
    quantity = fake.random_int(min=1, max=1000)
    recipient = next_entity.businessName.values[0]
    unit_of_measure = fake.random_element(elements=('kg', 'g', 'lbs', 'Dozen'))
    farm_name = farm.businessName.values[0]
    phone_number = farm.primaryPhone.values[0]

    #Determine what field or container was used
    if ftl_item.Supply_Chain.values[0] == 'Farmed':
        field_name = random.choice(field_name_list)
        container = 'n/a'
    elif ftl_item.Supply_Chain.values[0] == 'Aquaculture':
        field_name = 'n/a'
        container = random.choice(container_name_list) + ' ' + str(random.randint(1,10))
    
    #Need to add location description of farm where it was harvested
    harvesting_info = {
        'dataSubmitter': data_submitter,
        'recipient' : recipient,
        'commodity': food_name,
        'quantity' : quantity,
        'unitOfMeasure' : unit_of_measure,
        'farmName' : farm_name,
        'fieldName' : field_name,
        'containerName' : container,
        'cteDate' : date_harvested,
        'phoneNumber' : phone_number    
    }

    return harvesting_info

In [7]:
def cooling_cte(harvesting_info, ftl_item, facility, next_entity):

    data_submitter = facility.businessName.values[0]
    food_name = ftl_item.Food.values[0]
    quantity = harvesting_info['quantity']
    recipient = next_entity.businessName.values[0]
    unit_of_measure = harvesting_info['unitOfMeasure']
    farm_name = harvesting_info['dataSubmitter']
    cooler_location = facility.businessName.values[0]
    date_cooled = harvesting_info['cteDate']
    phone_number = facility.primaryPhone.values[0]
    
    #Need to add location description of farm where it was harvested
    cooling_info = {
        'dataSubmitter': data_submitter,
        'recipient' : recipient,
        'commodity': food_name,
        'quantity' : quantity,
        'unitOfMeasure' : unit_of_measure,
        'coolerLocation' : cooler_location,
        'cteDate' : date_cooled,
        'harvesterName' : farm_name,
        'phoneNumber' : phone_number    
    }

    return cooling_info

In [8]:
def packaging_cte(fake, harvesting_info, cooling_info, ftl_item, facility):
    # List of packaging types
    packaging_type = ['Box', 'Bag', 'Crate', 'Can', 'Bottle', 'Jar', 'Pouch', 'Carton']

    data_submitter = facility.businessName.values[0]
    package_type = random.choice(packaging_type)
    quantity = fake.random_int(min=1, max=1000)
    unit_of_measure = fake.random_element(elements=('kg', 'g', 'lbs', 'Dozen'))
    packaging_date = str(datetime.strptime(cooling_info['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))
    phone_number = fake.basic_phone_number()
    traceability_lot_code = fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    product_description = harvesting_info['dataSubmitter'] + ' ' + harvesting_info['commodity'] + ', ' + str(fake.random_int(min=1, max= 50)) + unit_of_measure + ' case'


    packaging_info = {
        'dataSubmitter': data_submitter,
        'commodity':ftl_item.Food.values[0],
        'dateFoodReceived' : packaging_date,
        'quantityReceived':harvesting_info['quantity'],
        'harvestingLocation':harvesting_info['dataSubmitter'],
        'harvestedField':harvesting_info['fieldName'], #For produce
        'harvestedContainer':harvesting_info['containerName'], #For Aquaculture
        'harvestedPhoneNumber':harvesting_info['phoneNumber'],
        'dateHarvested':harvesting_info['cteDate'],
        'coolingLocation':cooling_info['dataSubmitter'],
        'dateOfCooling':cooling_info['cteDate'],
        'traceabilityLotCode': traceability_lot_code,
        'productDescription':product_description,
        'quantity' : quantity,
        'packageType': package_type,
        'traceabilityLotCodeSourceLocation':facility.businessName.values[0],
        'cteDate' : packaging_date,
        'referenceDocumentTypeNumber': 'IP WO ' + str(random.randint(10000,50000)) 
    }

    return packaging_info

In [9]:
def shipping_cte(previous_cte, next_entity, facility):

    shippedDate = str(datetime.strptime(previous_cte['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))

    shipping_info = {
        'dataSubmitter': facility.businessName.values[0],
        'traceabilityLotCode': previous_cte['traceabilityLotCode'],
        'quantity': previous_cte['quantity'],
        'unitOfMeasure':previous_cte['unitOfMeasure'],
        'productDescription': previous_cte['productDescription'],
        'subsequentLocation': next_entity.businessName.values[0],
        'previousSourceLocation': previous_cte['dataSubmitter'],
        'cteDate': shippedDate,
        'traceabilityLotCodeSourceLocation': previous_cte['traceabilityLotCodeSourceLocation'],
        'referenceDocumentTypeNumber': 'BOL ' + str(random.randint(10000,50000)) 
    }

    return shipping_info

In [10]:
def receiving_cte(previous_cte, facility):
    receivingDate = str(datetime.strptime(previous_cte['cteDate'], '%Y-%m-%d') + timedelta(days=random.randint(0,3)))

    receiving_info = {
        'dataSubmitter': facility.businessName.values[0],
        'traceabilityLotCode': previous_cte['traceabilityLotCode'],
        'quantity': previous_cte['quantity'],
        'unitOfMeasure':previous_cte['unitOfMeasure'],
        'productDescription': previous_cte['productDescription'],
        'previousSourceLocation': previous_cte['dataSubmitter'],
        'receivingLocation': facility.businessName.values[0],
        'cteDate': receivingDate,
        'traceabilityLotCodeSourceLocation': previous_cte['traceabilityLotCodeSourceLocation'],
        'referenceDocumentTypeNumber': 'BOL ' + str(random.randint(10000,50000))
    }

    return receiving_info

In [24]:
def first_land_based_receiver_cte(fake, ftl_item, facility):
    #Determine the dates of harvest and landing
    start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
    end_date = datetime.strptime('2023-06-01','%Y-%m-%d')
    firstHarvestDate = fake.date_between_dates(date_start=start_date, date_end=end_date)
    secondHarvestDate = firstHarvestDate + timedelta(days=random.randint(2,10))

    dateLanded = secondHarvestDate + timedelta(days=random.randint(1,3))

    #Determine Harvest Location
    secondLine = 'Major Fishing Area ' + str(random.randint(1,10))
    thirdLine = random.choice(['Northern', 'Southern', 'Central']) + ' ' + random.choice(['Pacific','Atlantic']) 

    harvestDateAndLocation = str(firstHarvestDate) + ' - ' + str(secondHarvestDate) + '\n' + secondLine + '\n' + thirdLine

    #Determine the quantity and unit of measure
    quantity = random.randint(20,1000)
    unitOfMeasure = random.choice(['kg', 'lb'])

    #Determine the traceability lot code
    traceability_lot_code = fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')

    first_land_based_receiver_info = {
        'dataSubmitter':facility.businessName.values[0],
        'traceabilityLotCode':traceability_lot_code,
        'fishType':ftl_item.Food.values[0],
        'quantity':quantity,
        'unitOfMeasure':unitOfMeasure,
        'harvestDateAndLocation':harvestDateAndLocation,
        'traceabilityLotCodeSourceLocation':facility.businessName.values[0],
        'cteDate':str(dateLanded),
        'referenceDocumentTypeNumber': 'Landing Record: ' + str(random.randint(10000,50000))
    }

    return first_land_based_receiver_info

### Supply Chain Functions

In [25]:
def farm_function(fake, ftl_item, supply_chain, entities_df):
    farm = entities_df[entities_df.businessType == 'farm'].sample()
    packaged_type = supply_chain[supply_chain.index('farm') + 1]

    #Initialize the CTEs for the farm
    ctes = {}

    #Determine what the next entity is for the KDEs that happen on the farm
    if packaged_type == 'field_packed':
        next_entity = farm
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)
        ctes['cooling'] = cooling_cte(ctes['harvesting'], ftl_item, farm, next_entity)
        ctes['initialPackaging'] = packaging_cte(fake,ctes['harvesting'],ctes['cooling'],ftl_item,farm)
    elif packaged_type == 'packaging_processor':
        next_entity = entities_df[entities_df.businessType == 'packaging'].sample()
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)

    return ctes


In [26]:
def initial_fish_function(fake, ftl_item, supply_chain, entities_df):
    category = ftl_item.Supply_Chain.values[0]

    ctes={}
    #Aquaculture route
    if category == 'Aquaculture':
        farm = entities_df[entities_df.businessType == 'fish_farm']
        next_entity=farm
        ctes['harvesting'] = harvesting_cte(fake, ftl_item, farm, next_entity)
        ctes['cooling'] = cooling_cte(ctes['harvesting'], ftl_item, farm, next_entity)
        ctes['initialPackaging'] = packaging_cte(fake,ctes['harvesting'],ctes['cooling'],ftl_item,farm)

    #Wild Caught Route
    elif category =='Caught':
        facility = entities_df[entities_df.businessType == 'land_based_receiver']
        ctes['firstLandBasedReceiving'] = first_land_based_receiver_cte(fake, ftl_item, facility)

    return ctes 

# Generate the Data

In [14]:
fake = Faker()

In [15]:
food_item = ftl_df[ftl_df.Supply_Chain == 'Farmed'].sample()
sc = generate_supply_chain(food_item)
ctes = farm_function(fake, food_item, sc, entities_df)
ctes

{'harvesting': {'dataSubmitter': 'Misty Hills Farm',
  'recipient': 'Fresh Box',
  'commodity': 'clover sprouts',
  'quantity': 427,
  'unitOfMeasure': 'g',
  'farmName': 'Misty Hills Farm',
  'fieldName': 'Bed 34',
  'containerName': 'n/a',
  'cteDate': '2023-02-28',
  'phoneNumber': '765-190-3015'}}

In [27]:
food_item = ftl_df[ftl_df.Supply_Chain == 'Caught'].sample()
sc = generate_supply_chain(food_item)
ctes = initial_fish_function(fake, food_item, sc, entities_df)
ctes

{'firstLandBasedReceiving': {'dataSubmitter': 'Harbor Catches',
  'traceabilityLotCode': 'ZM-1992',
  'fishType': 'amberjack',
  'quantity': 557,
  'unitOfMeasure': 'kg',
  'harvestDateAndLocation': '2022-10-11 - 2022-10-17\nMajor Fishing Area 9\nSouthern Pacific',
  'traceabilityLotCodeSourceLocation': 'Harbor Catches',
  'cteDate': '2022-10-19',
  'referenceDocumentTypeNumber': 'Landing Record: 17108'}}

In [177]:
date + timedelta(days=5)

datetime.date(2023, 2, 7)

In [183]:
str(date)

'2023-02-02'

In [23]:
print('2023-04-06 - 2023-04-16\nMajor Fishing Area 1\nSouthernAtlantic')

2023-04-06 - 2023-04-16
Major Fishing Area 1
SouthernAtlantic
