In [31]:
import numpy as np
import pandas as pd
import random
import ast

In [2]:
data = pd.read_csv('../scrapers/pfaf/all_plants.csv')

In [11]:
# Split pollinators into list. Should be unnecessary because I changed pfaf.py 
data['Pollinators'] = data['Pollinators'].apply(lambda x: str(x).split(','))

In [30]:
data['HardinessZones'][3]

'[3, 4, 5, 6, 7, 8, 9]'

In [32]:
def filter_plants(data, family=None, genus=None, species=None, common_name=None, growth_rate=None,
              hardiness_zone=None, height=None, width=None, plant_type=None, pollinators=None,
              leaf=None, flower=None, ripen=None, reproduction=None, soils=None, pH=None,
              preferences=None, tolerances=None, habitat=None, habitat_range=None,
              edibility=None, medicinal=None, other_uses=None, pfaf=None):

    # Define the list of valid pollinators
    valid_pollinators = ['bees', 'insects', 'wind', 'flies', 'lepidoptera', 'beetles']

    filtered_df = data
    # Filter by each column if a value is provided
    # Starting with full dataframe self.data
    if family is not None:
        filtered_df = filtered_df[filtered_df['Family'] == family]
        
    if genus is not None:
        filtered_df = filtered_df[filtered_df['Genus'] == genus]
        
    if species is not None:
        filtered_df = filtered_df[filtered_df['Species'] == species]
        
    if common_name is not None:
        filtered_df = filtered_df[filtered_df['CommonName'] == common_name]
        
    if growth_rate is not None:
        filtered_df = filtered_df[filtered_df['GrowthRate'].isin(growth_rate) | filtered_df['GrowthRate'].isna()]
        
    if hardiness_zone is not None:
        filtered_df = filtered_df[filtered_df['HardinessZones'].apply(lambda zones: hardiness_zone in ast.literal_eval(zones))]
            
    if height is not None:
        filtered_df = filtered_df[filtered_df['Height'] == height]
        
    if width is not None:
        filtered_df = filtered_df[filtered_df['Width'] == width]
        
    if plant_type is not None:
        filtered_df = filtered_df[filtered_df['Type'] == plant_type]
        
    if pollinators is not None:
        if isinstance(pollinators, list) and all(p in valid_pollinators for p in pollinators):
            filtered_df = filtered_df[filtered_df['Pollinators'].apply(lambda x: all(p in x for p in pollinators))]
        else:
            raise ValueError(f'Pollinators must be a list of any combination of {valid_pollinators}')
    
    if leaf is not None:
        filtered_df = filtered_df[filtered_df['Leaf'] == leaf]
        
    if flower is not None:
        filtered_df = filtered_df[filtered_df['Flower'] == flower]
        
    if ripen is not None:
        filtered_df = filtered_df[filtered_df['Ripen'] == ripen]
        
    if reproduction is not None:
        filtered_df = filtered_df[filtered_df['Reproduction'] == reproduction]
        
    if soils is not None:
        filtered_df = filtered_df[filtered_df['Soils'] == soils]
        
    if pH is not None:
        filtered_df = filtered_df[filtered_df['pH'] == pH]
        
    if preferences is not None:
        filtered_df = filtered_df[filtered_df['Preferences'] == preferences]
        
    if tolerances is not None:
        filtered_df = filtered_df[filtered_df['Tolerances'] == tolerances]
        
    if habitat is not None:
        filtered_df = filtered_df[filtered_df['Habitat'] == habitat]
        
    if habitat_range is not None:
        filtered_df = filtered_df[filtered_df['HabitatRange'] == habitat_range]
        
    if edibility is not None:
        filtered_df = filtered_df[filtered_df['Edibility'] == edibility]
        
    if medicinal is not None:
        filtered_df = filtered_df[filtered_df['Medicinal'] == medicinal]
        
    if other_uses is not None:
        filtered_df = filtered_df[filtered_df['OtherUses'] == other_uses]
        
    if pfaf is not None:
        filtered_df = filtered_df[filtered_df['PFAF'] == pfaf]

    return filtered_df

In [33]:
df_filtered = filter_plants(data, 
                            hardiness_zone=5, 
                            pollinators=['flies'], 
                            growth_rate=['fast'])

In [34]:
df_filtered

Unnamed: 0,Family,Genus,Species,CommonName,GrowthRate,HardinessZones,Height,Width,Type,Pollinators,...,Soils,pH,Preferences,Tolerances,Habitat,HabitatRange,Edibility,Medicinal,OtherUses,PFAF
50,Aristolochiaceae,Asarum,shuttleworthii,"Asarabacca, Mottled Wild Ginger",fast,"[5, 6, 7, 8, 9]",0.1,0.3,evergreen,[flies],...,"['light', 'medium', 'heavy']",mildly acid and neutral,[' well-drained soil. Suitable pH: mildly acid'],[],"Rich mountain woods[235]. Acidic soils, often ...",South-eastern N. America - West Virginia to Al...,(2 of 5),(0 of 5),(2 of 5),https://pfaf.org/user/Plant.aspx?LatinName=Asa...
159,Liliaceae,Fritillaria,camschatcensis,"Kamchatka Lily, \tKamchatka fritillary",,"[4, 5, 6, 7, 8]",0.5,0.1,,[flies],...,"['light', 'medium']","mildly acid, neutral and basic (mildly alkalin...",[' well-drained soil. Suitable pH: mildly acid...,[],Moist areas from sea level to 600 metres[60] i...,Northern N. America - Washington to Alaska and...,(4 of 5),(0 of 5),(0 of 5),https://pfaf.org/user/Plant.aspx?LatinName=Fri...
414,Scrophulariaceae,Veronica,officinalis,Common Speedwell,,"[3, 4, 5, 6, 7]",0.1,0.3,,"[flies, bees]",...,"['light', 'medium', 'heavy']","mildly acid, neutral and basic (mildly alkaline)",[' well-drained soil. Suitable pH: mildly acid...,[],"Heaths, moors, grassland, dry hedgebanks and c...","Europe, including Britain, from Iceland south ...",(1 of 5),(2 of 5),(0 of 5),https://pfaf.org/user/Plant.aspx?LatinName=Ver...


In [4]:
working_data = data[['CommonName', 'GrowthRate', 'Height', 'Width', 'pH', 'Habitat']]

In [5]:
def call_plant(data, search_name):
    # dataframe with all results
    results = data[data['CommonName'].str.lower().str.contains(search_name.lower().replace(" ", "|"),na=False)]
    all_results = []
    # return first result for simplicity here, later allow for user to select from results
#     for res in results:
#         # get attributes, so as to not return a dataframe
#         common_name = res['CommonName']
#         height = res['Height']
#         width = res['Width']
#         ph = res['pH']
#         habitat = res['Habitat']
#         all_results.append({'name': common_name,
#                            'height': height,
#                            'width': width,
#                            'ph': ph,
#                            'habitat': habitat})
#     return all_results
    return results

In [6]:
search_result = call_plant(data=working_data, search_name='walnut')

In [7]:
search_result

Unnamed: 0,CommonName,GrowthRate,Height,Width,pH,Habitat
188,Black Walnut,fast,30.0,20.0,"mildly acid, neutral and basic (mildly alkaline)","Rich fertile woods and hillsides[43, 82] in de..."
189,"Walnut, English walnut, Persian Walnut,",medium,20.0,20.0,"mildly acid, neutral and basic (mildly alkaline)","Forests in the Himalayas, preferring a norther..."


In [8]:
print([res for res in search_result.iterrows()][1][1])

CommonName              Walnut, English walnut, Persian Walnut,
GrowthRate                                               medium
Height                                                     20.0
Width                                                      20.0
pH             mildly acid, neutral and basic (mildly alkaline)
Habitat       Forests in the Himalayas, preferring a norther...
Name: 189, dtype: object


In [9]:
for name in search_result['CommonName']:
    print(name.split(',')[0])
# Need to remove pfaf references in square brackets []

Black Walnut
Walnut


In [10]:
class MyDesign():
    def __init__(self, length, width, units):
        self.length = length
        self.width = width
        self.units = units
        self.area = length * width
        self.overlap = 0
        # I'll use a list to store data, create dataframe when needed
        self.data = []
        
    def add_plant(self, common_name, x_pos, y_pos, height, width):
        self.data.append({
            'common_name': common_name, # should change to latin name as unique id
            'x, y': (x_pos, y_pos),
            'height': height,
            'width': width
        })
    
    def to_dataframe(self):
        df = pd.DataFrame(self.data)
        # Use groupby latin_name to sum plants 
        ### *** THIS DOESNT WORK. NEED TO MAINTAIN INDIVIDUAL X- Y- POS AND COUNT TOTALS *** ### 
        return df.groupby('common_name').agg({
            'common_name': 'count', 'x, y': 'unique', 'height': 'mean', 'width': 'mean'
        })

In [11]:
design1 = MyDesign(10, 20, 'meters')

In [12]:
plant_list = ['walnut', 'onion', 'wild garlic',
              'Rosemary', 'garlic', 'Grape', 
              'English walnut', 'english walnut', 'wild garlic']

In [13]:
for plant in plant_list:
    try:
        name, height, width = call_plant(working_data, plant)
        x = round(random.random() * design1.width, 2)
        y = round(random.random() * design1.length, 2)
        design1.add_plant(common_name=name, x_pos=x, y_pos=y, height=height, width=width)
    except TypeError:
        pass

ValueError: too many values to unpack (expected 3)

In [None]:
design1.data

In [None]:
design1_df = design1.to_dataframe()

In [None]:
design1_df

In [14]:
test_dict = {'a': ['var1', 'var4', 9], 'b': 3, 'c': [6, 7]}

In [15]:
len(test_dict)

3

In [17]:
test_dict.keys()

dict_keys(['a', 'b', 'c'])