In [27]:
import pandas as pd
from pathlib import Path
import requests
import json
import time
import numpy as np

from globi import formatInteractions


In [2]:
def log_df(df, nrows=5):
    print(df.shape)
    return df.head(nrows)

In [3]:
def print_json(obj):
    print(json.dumps(obj,  indent=4))

In [4]:
raw_data_paths = Path('../data').rglob('taxa_list.csv')
data_paths = [str(path) for path in raw_data_paths]
print(data_paths)

interaction_path = 'outputs/interactions.csv'
de_taxa_path = 'outputs/de_taxa_list.csv'
globi_taxa_path = 'outputs/interactions_taxa_list.csv'

[]


# interactions

## create interactions taxa csv

In [32]:
cols = ['scientific_name', 'common_name', 'taxon_id']
taxa_df = pd.read_csv(de_taxa_path, dtype=str, usecols=cols)
log_df(taxa_df)


(7315, 3)


Unnamed: 0,taxon_id,common_name,scientific_name
0,1,Animals,Animalia
1,47534,Cnidarians,Cnidaria
2,48921,Hydrozoans,Hydrozoa
3,152823,Siphonophores,Siphonophorae
4,117304,Man O' Wars,Physaliidae


In [33]:
cols = ['subject_taxon_id', 'subject_common_name', 'subject_scientific_name']
interactions_df = pd.read_csv(interaction_path, dtype=str, usecols=cols)
interactions_df.drop_duplicates(inplace=True)

interactions_df.rename(columns = {
    'subject_taxon_id': 'taxon_id', 
    'subject_common_name': 'common_name', 
    'subject_scientific_name': 'scientific_name'
}, inplace=True)

interactions_df['has_globi'] = True

log_df(interactions_df)

(4486, 4)


Unnamed: 0,taxon_id,common_name,scientific_name,has_globi
0,143452,Lettered Sphinx,Deidamia inscriptum,True
10,47727,maples,Acer,True
18,53178,ribwort plantain,Plantago lanceolata,True
26,60307,Kentucky bluegrass,Poa pratensis,True
47,47124,dicots,Magnoliopsida,True


In [34]:
updated_globi_taxa = taxa_df.merge(interactions_df, how="left" )
log_df(updated_globi_taxa)

(7315, 4)


Unnamed: 0,taxon_id,common_name,scientific_name,has_globi
0,1,Animals,Animalia,True
1,47534,Cnidarians,Cnidaria,True
2,48921,Hydrozoans,Hydrozoa,True
3,152823,Siphonophores,Siphonophorae,True
4,117304,Man O' Wars,Physaliidae,True


In [35]:
updated_globi_taxa.to_csv(globi_taxa_path, index=False)

## create interactions

In [9]:
cols = ['scientific_name', 'common_name', 'taxon_id']
taxa_df = pd.read_csv(de_taxa_path, dtype=str, usecols=cols)
log_df(taxa_df)

(7315, 3)


Unnamed: 0,taxon_id,common_name,scientific_name
0,1,Animals,Animalia
1,47534,Cnidarians,Cnidaria
2,48921,Hydrozoans,Hydrozoa
3,152823,Siphonophores,Siphonophorae
4,117304,Man O' Wars,Physaliidae


In [10]:
globi_taxa_df = pd.read_csv(globi_taxa_path, dtype=str)
globi_taxa_df = globi_taxa_df[globi_taxa_df['has_globi'].isna()]
log_df(globi_taxa_df)

(3970, 4)


Unnamed: 0,taxon_id,common_name,scientific_name,has_globi
6,117302,Portuguese Man o' War,Physalia physalis,
10,48329,Moon Jellies,Aurelia,
11,986245,Southern Moon Jelly,Aurelia marginalis,
12,48482,Pelagiid Jellies,Pelagiidae,
13,48483,Sea Nettles,Chrysaora,


In [11]:
interaction_path = 'outputs/interactions.csv'
interaction_df = pd.read_csv(interaction_path, dtype=str)
log_df(interaction_df)

(54119, 7)


Unnamed: 0,subject_taxon_id,subject_common_name,subject_scientific_name,target_scientific_name,target_common_name,target_taxon_id,interaction
0,143452,Lettered Sphinx,Deidamia inscriptum,Parthenocissus,,50280,eats
1,143452,Lettered Sphinx,Deidamia inscriptum,Vitis,grapevines,60773,eats
2,143452,Lettered Sphinx,Deidamia inscriptum,Ampelopsis brevipedunculata,porcelain berry,457553,eats
3,143452,Lettered Sphinx,Deidamia inscriptum,Vitis vinifera,wine grape,79519,eats
4,143452,Lettered Sphinx,Deidamia inscriptum,Parthenocissus quinquefolia,Virginia creeper,50278,eats


In [24]:
interaction_data = []
count = 0
for index, row in globi_taxa_df.iterrows():
    if count % 10 == 0:
        print(count, end=' ')
        
    for interaction in ['eats', 'eatenBy', 'pollinates', 'pollinatedBy', 'preysOn', 'preyedUponBy']:
        results = formatInteractions(taxa_df, row, interaction, 100)
        if results:
            interaction_data += results
    count += 1
            


0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 

KeyboardInterrupt: 

In [25]:
new_interactions = pd.DataFrame(interaction_data)
interaction_df = interaction_df.append(new_interactions)
log_df(interaction_df)

(61213, 7)


Unnamed: 0,subject_taxon_id,subject_common_name,subject_scientific_name,target_scientific_name,target_common_name,target_taxon_id,interaction
0,143452,Lettered Sphinx,Deidamia inscriptum,Parthenocissus,,50280,eats
1,143452,Lettered Sphinx,Deidamia inscriptum,Vitis,grapevines,60773,eats
2,143452,Lettered Sphinx,Deidamia inscriptum,Ampelopsis brevipedunculata,porcelain berry,457553,eats
3,143452,Lettered Sphinx,Deidamia inscriptum,Vitis vinifera,wine grape,79519,eats
4,143452,Lettered Sphinx,Deidamia inscriptum,Parthenocissus quinquefolia,Virginia creeper,50278,eats


create interactions csv

In [26]:
interaction_df.to_csv(interaction_path, index=False)


create interactions json

In [10]:

path = '../app/src/lib/data/interactions.csv'
interaction_df.to_csv(path, index = False)