### Functions

In [23]:
#load libraries
import pandas as pd
import numpy as np
import datetime
import os

In [24]:
# Extract unique categories from the brackets
def taxonomic_sorting(extension, database):
    categories = set()
    for entry in database['Taxonomic lineage']:
        if '(' in entry and ')' in entry:
            parts = [part.strip() for part in entry.split(',')]
            for part in parts:
                if '(' in part and ')' in part:
                    category = part.split('(')[-1].split(')')[0]
                    categories.add(category)

    # Create a dictionary to hold the data for each category
    category_data = {category: [] for category in categories}

    # Add the 'Org' column to the dictionary
    category_data['Org'] = list(database['Org'])

    # Iterate over each row and populate the dictionary
    for index, row in database.iterrows():
        for category in categories:
            category_data[category].append(next((entry.split('(')[0].strip() for entry in row['Taxonomic lineage'].split(', ') if category in entry), None))

    # Create a DataFrame from the dictionary
    df = pd.DataFrame(category_data)

    # Fill missing values with NaN
    df.fillna(value=np.nan, inplace=True)

    if extension == 'all':
        database = df[['Org','kingdom','clade1','clade2','phylum','subphylum','class','family']]
    elif extension == 'amoeba':
        if database.get('clade2') is not None:
            if database.get('class') is not None:
                database = df[['Org','clade1', 'phylum', 'class','clade2','family']]
            else:
                database = df[['Org','clade1', 'phylum','clade2','family']]
        else:
            database = df[['Org','clade1', 'phylum','family']]
    else:
        if database.get('kingdom') is not None:
            if database.get('phylum') is not None:
                database = df[['Org','kingdom','phylum','family']]
            else:
                database = df[['Org','kingdom','family']]
        else:
            database = df[['Org']]

    return database

def taxonomic_rename(extension, database):
    """
    Original names: 
    Bilateria (no rank), Deuterostomia (no rank), Vertebrata (no rank)
    Bilateria (no rank), Protostomia (no rank), Ecdysozoa (no rank)
    
    Bilateria (clade1), Deuterostomia (clade2), Vertebrata (Vertebrata)
    Bilateria (clade1), Protostomia (clade2), Ecdysozoa (Superphylum)

    """
    if macro == 'metazoa':
        if extension == 'all':
            #Taxonomic lineage rename
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), Opisthokonta (no rank), ",""))
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Bilateria (no rank)","Bilateria (clade1)"))
        
            ##delimiting some ranks for vertebrata
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Deuterostomia (no rank)","Deuterostomia (clade2)"))
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Vertebrata (no rank)","Vertebrata (superphylum)"))
        
            ##delimiting some ranks for invertebrata
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Protostomia (no rank)","Protostomia (clade2)"))
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Ecdysozoa (no rank)","Ecdysozoa (superphylum)"))
        else:
             #Taxonomic lineage rename
            database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), Opisthokonta (no rank), ",""))
    
    elif macro == 'amoeba':
        #Taxonomic lineage rename
        database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
        database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Amoebozoa (no rank)","Amoebozoa (clade1)"))
    
        ##delimiting ranks
        database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("Dictyostelia (no rank)","Dictyostelia (clade2)"))

    else:
        #Taxonomic lineage rename
        database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
    return database

def gene_summary(name, database, database_org):
    grouped = database.groupby(['Org', 'Gene']).count()

    database_pivoted = grouped.pivot_table(index='Org', columns='Gene', values='Entry', aggfunc='count')

    # Calculate row sums and column sums while ignoring NaN
    database_pivoted['Row_Sum'] = database_pivoted.sum(axis=1, skipna=True)
    database_pivoted.loc['Column_Sum'] = database_pivoted.sum(skipna=True)

    database_pivoted.reset_index(inplace=True)

    database_pivoted_merge = pd.merge(database_org, database_pivoted,  left_on='Org', right_on= 'Org', how='outer')

    database_summary = database_pivoted_merge[['Org', 'Row_Sum']]
    database_summary = database_summary[~(database_summary['Org'].str.contains('Column_Sum'))]
    database_name = name + '_gTotal'
    database_summary = database_summary.rename(columns={'Row_Sum': database_name})

    return database_pivoted_merge, database_summary

In [25]:
def get_organism(database):
    database_org = database[['Organism','Org']]
    database_org = database_org.drop_duplicates()
    return database_org

def get_lineage(extension,database):
    database_lng = database[['Org','Taxonomic lineage']]
    database_lng = database_lng.drop_duplicates()
    database_lng = taxonomic_sorting(extension,database_lng)
    return database_lng

def full_lineage(database_org, database_lng):
    database_org_lng = pd.merge(database_org, database_lng,  left_on='Org', right_on= 'Org', how='outer')
   # Reorder columns
    cols = database_org_lng.columns.tolist()
    cols = cols[2:] + cols[:2]  # Move the first two columns to the end
    database_org_lng = database_org_lng[cols]
    return database_org_lng

In [26]:
def process_dataset(name, type, extension, selected_data, gene_count):
    
    gpcr_org = get_organism(selected_data)
    gpcr_lng = get_lineage(extension, selected_data)
    gpcr_org_lng = full_lineage(gpcr_org, gpcr_lng)
    gpcr_genes, gpcr_summary = gene_summary(name,selected_data,gpcr_org)
    
    if gene_count == False:
        gpcr_genes = "Select gene_count = True to show the genes involved"

    taxonomy = gpcr_org_lng
    
    return gpcr_genes, gpcr_summary, taxonomy

In [27]:
def species_specific(item, species_list_custom, selected_data):
    species_room = []
    # loop to capture more than one specie
    new_specie = True
    for item in species_list_custom:
        # generate a specific entry list
        db_species = selected_data[selected_data['Organism'] == item]
        if db_species.empty:
            print('DataFrame is empty for ', item, '!')
        else:
            print('Entries found for ', item, '!')
            
        species_room.append(db_species)
    if species_room == []:
        species_dataframe = pd.DataFrame()
    else:
        species_dataframe = pd.concat(species_room)
        
    return species_dataframe

In [28]:
def uniprot_summary(gpcr, name, type, extension, nonredundant, gene_count, custom_organisms, alphafold, folder_name, tree_mapping):
    '''
    Will give error is datasets are empty. 
    '''
    if gpcr.empty == True:
        print("no info")
        gpcr_genes = "no info"
        gpcr_summary = "no info"
        gpcr_subset = "no info"
        taxonomy = "no info"
        tree_list = "no info"
        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
    else:  
        #drop empty values in AlphaFoldDB column 
        gpcr = gpcr.dropna(subset=['AlphaFoldDB'])
        gpcr = taxonomic_rename(extension, gpcr)
    
        gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
        
        #selecting data
        if type == 'reviewed':
            selected_data = gpcr[gpcr['Reviewed']=='reviewed']
            if selected_data.empty == True:
                return
            
        elif type == 'unreviewed':
            selected_data = gpcr[gpcr['Reviewed']=='unreviewed']
            if selected_data.empty == True:
                return
            
        elif type == 'all':
            selected_data = gpcr
            if selected_data.empty == True:
                return
        else:
            print("Please, input 'reviewed' or 'unreviewed'")
            
        #customize model 
        if custom_organisms == True:    
    
            #user_input = input("Uniprot organism to search (e.g. 'HUMAN', 'ORNAN'). 9CRUS and 9EUKA organisms need to specify a species:")
            user_input = 'HUMAN,DANRE,CHICK,ANOCA,XENLA,LATCH,CALMI,PETMA,BRAFL,CIOIN,SACKO,DROME,9CRUS,CAEEL,SCHMA,LIMPO,CAPTE,HELRO,STRPU,LOTGI,NEMVE,ACRDI,HYDVU,ANTEL,ACRMI,APLCA,TRIAD,MNELE,OSCCA,AMPQE,MONBE,SALR5,CAPO3,MINVI,9EUKA,CREFR,9EUKA,AMOPA,SCHPM,TUBME,NEUCS,YEAST,MICPC,CRYNE,COPCI,USTMD,9FUNG,MUCCI,RHIOR,ALLMA,ALLM3,BATDE,SPIPN,ENCCN,9MICR,THETB,DICDI,DICPU,HETPA,ENTHI,ACACA,ACACF,ARATH,ERYGU,AQUCA,BRADI,SORBI,SELML,PHYPA,CHLRE,CHLVA,OSTTA,MICPS,9FLOR,9RHOD,CYAME,CYAPA,ECTSI,NANGC,AURAN,PHATR,PHYIN,TOXGO,PLAFA,PARTE,TETTH,TETTS,PERM5,BIGNA,EMIHU,GUITH,NAEGR,TRIVA,GIAIN,TRYCR,LEIMA,PHATC,DIALT,CHRCT,PORPP,CHOCR,CYAM1,GALSU,NEOHI,PORUM'
            customised_flag = "customised"

            #species list custom - 9euka extended to haptophyta
            species_list_9euka = ['Sphaeroforma arctica','Abeoforma whisleri','Pirum gemmata','Corallochytrium limacisporum',
                                 'Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri)','Chrysochromulina tobinii',
                                  'Phaeocystis antarctica', #haptophyta
                                 'Calcidiscus leptoporus','Coccolithus braarudii','Haptolina brevifila','Haptolina ericina',
                                  'Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi)',
                                 'Chrysotila carterae (Marine alga) (Syracosphaera carterae)','Prymnesium polylepis', 
                                 'Palpitomonas bilix'] # palpitomonas
            species_list_9crus = ['Daphnia magna']
            species_list_9fung = ['Podila verticillata NRRL 6337']
            species_list_9stra = ['Nannochloropsis gaditana']
            species_list_9alve = ['Perkinsus marinus']
            species_list_9micr = ['Nematocida parisii']
            #new added - rhodophyta
            species_list_9flor = ['Gracilariopsis chorda']
            species_list_9rhod = ['Compsopogon caeruleus','Rhodosorus marinus',
                                  'Timspurckia oligopyrenoides','Erythrolobus australicus']
            
            # Split the user input by comma and remove any extra whitespace
            items_org = user_input.split(',')
            items_org_else = user_input.split(',') #this is for the second list

            # Extract 9CRUS or 9EUKA 
            item1 = items_org.pop(items_org.index('9EUKA')) if '9EUKA' in items_org else None
            item2 = items_org.pop(items_org.index('9CRUS')) if '9CRUS' in items_org else None
            item3 = items_org.pop(items_org.index('9FUNG')) if '9FUNG' in items_org else None
            item4 = items_org.pop(items_org.index('9STRA')) if '9STRA' in items_org else None
            item5 = items_org.pop(items_org.index('9ALVE')) if '9ALVE' in items_org else None
            item6 = items_org.pop(items_org.index('9MICR')) if '9MICR' in items_org else None
            item7 = items_org.pop(items_org.index('9FLOR')) if '9FLOR' in items_org else None
            item8 = items_org.pop(items_org.index('9RHOD')) if '9RHOD' in items_org else None

            if items_org == []:
                if item1 == '9EUKA':
                    species_list_custom = species_list_9euka
                    
                    # calling function for picking the species specific
                    temp_dataset = species_specific(item1, species_list_custom, selected_data)

                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                    else:
                        selected_data = temp_dataset
                    
                elif item2 == '9CRUS':
                    species_list_custom = species_list_9crus
                    
                    temp_dataset = species_specific(item2,species_list_custom, selected_data)
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                    else:
                        selected_data = temp_dataset
                    
                elif item3 == '9FUNG':
                    species_list_custom = species_list_9fung
                    
                    temp_dataset = species_specific(item3,species_list_custom, selected_data)
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                        
                    else:
                        selected_data = temp_dataset
                    
                elif item4 == '9STRA':
                    species_list_custom = species_list_9stra
                    
                    temp_dataset = species_specific(item4,species_list_custom, selected_data)
                    
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                        
                    else:
                        selected_data = temp_dataset
                    
                elif item5 == '9ALVE':
                    species_list_custom = species_list_9alve
                    
                    temp_dataset = species_specific(item5, species_list_custom, selected_data)
                    
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                        
                    else:
                        selected_data = temp_dataset
                    
                elif item6 == '9MICR':
                    species_list_custom = species_list_9micr
                    
                    temp_dataset = species_specific(item6, species_list_custom, selected_data)
                    
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                        
                    else:
                        selected_data = temp_dataset

                elif item7 == '9FLOR':
                    species_list_custom = species_list_9flor
                    
                    temp_dataset = species_specific(item7, species_list_custom, selected_data)
                    
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                    else:
                        selected_data = temp_dataset

                elif item8 == '9RHOD':
                    species_list_custom = species_list_9rhod
                    
                    temp_dataset = species_specific(item8, species_list_custom, selected_data)
                    
                    if temp_dataset.empty == True:
                        print("no info")
                        gpcr_genes = "no info"
                        gpcr_summary = "no info"
                        gpcr_subset = "no info"
                        taxonomy = "no info"
                        tree_list = "no info"
                        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
                        
                    else:
                        selected_data = temp_dataset
                    
                else:
                    return 'No species has been included. Run this again'
                    
            # when the list is not empty, and there is more information 
            else:
                #general dataset to concat - avoided the 9CRUS, 9EUKA, ...
                gpcr_subset = selected_data[selected_data['Org'].str.contains('|'.join(items_org))]
                #selected_data_limited = gpcr_subset

                #the full dataset where we will extract the 9CRUS or 9EUKA
                gpcr_subset_full = selected_data[selected_data['Org'].str.contains('|'.join(items_org_else))]
                
                if item1 == '9EUKA':
                    species_list_custom = species_list_9euka
                    db_species1 = species_specific(item1, species_list_custom, gpcr_subset_full)
                else:
                    db_species1 = None
                    
                if item2 == '9CRUS':
                    species_list_custom = species_list_9crus
                    db_species2 = species_specific(item2, species_list_custom, gpcr_subset_full)
                else:
                    db_species2 = None

                if item3 == '9FUNG':
                    species_list_custom = species_list_9fung
                    db_species3 = species_specific(item3, species_list_custom, gpcr_subset_full)
                else:
                    db_species3 = None
                    
                if item4 == '9STRA':
                    species_list_custom = species_list_9stra
                    db_species4 = species_specific(item4, species_list_custom, gpcr_subset_full)
                else:
                    db_species4 = None
                    
                if item5 == '9ALVE':
                    species_list_custom = species_list_9alve
                    db_species5 = species_specific(item5, species_list_custom, gpcr_subset_full)
                else:
                    db_species5 = None
                    
                if item6 == '9MICR':
                    species_list_custom = species_list_9micr
                    db_species6 = species_specific(item6, species_list_custom, gpcr_subset_full)
                else: 
                    db_species6 = None

                if item7 == '9FLOR':
                    species_list_custom = species_list_9flor
                    db_species7 = species_specific(item7, species_list_custom, gpcr_subset_full)
                else: 
                    db_species7 = None

                if item8 == '9RHOD':
                    species_list_custom = species_list_9rhod
                    db_species8 = species_specific(item8, species_list_custom, gpcr_subset_full)
                else: 
                    db_species8 = None

            # List of tree DataFrame variables and their respective colors
            db_species_list = [db_species1, db_species2, db_species3, db_species4, db_species5, db_species6, db_species7, db_species8]
            valid_df = []
            # Loop through each tree, check if it is a valid DataFrame, then set color and add to list
            for db_spec in db_species_list:
                if isinstance(db_spec, pd.DataFrame):  # Check if the variable is a valid DataFrame
                    valid_df.append(db_spec)
                    
            # Concatenate the valid dataframes, if there is 
            df_species_specific_full = pd.concat(valid_df, ignore_index=True)
            print(df_species_specific_full)
            if df_species_specific_full.empty: 
                selected_data = gpcr_subset
                print('No species specific to include!')
            else:
                selected_data = pd.concat([gpcr_subset, df_species_specific_full])
                print('At least one species specific to include!')

            #save unique names per each organism (need for heatmap)
            selected_data['Scientific_Name'] = selected_data['Organism'].apply(lambda x: ' '.join(x.split()[:2]))
                
            if nonredundant == True:
                
                # Sort by 'Protein names', 'Org', and 'Length' in descending order
                gpcr_subset = selected_data.sort_values(by=['Protein names', 'Scientific_Name', 'Length'], ascending=[True, True, False])

                # Group by 'Protein names' and calculate the average length
                gpcr_avg_length = gpcr_subset.groupby('Protein names', as_index=False)['Length'].mean()

                ## rename column name 
                gpcr_avg_length = gpcr_avg_length.rename(columns={'Length': 'Average Length'})
                
                # merge the average length
                gpcr_subset = gpcr_subset.merge(gpcr_avg_length, on='Protein names')
                
                # How far is from the average - absolute value
                gpcr_subset['Length Distance'] = (gpcr_subset['Average Length'] - gpcr_subset['Length']).abs()
                
                # sort ascending
                gpcr_subset = gpcr_subset.sort_values(by=['Length Distance','Length'], ascending=[True,True])
                
                # Check individual protein names
                #pf01_subset_filtered[pf01_subset_filtered['Protein names'] == 'G-protein coupled receptor 55-like protein']
                
                # Drop duplicates, keeping the first occurrence (the closest to the average, and has the lower length)
                gpcr_subset = gpcr_subset.drop_duplicates(subset=['Protein names','Scientific_Name'])
                
                selected_data = gpcr_subset
                
            if selected_data.empty == True:
                print("no info")
                gpcr_genes = "no info"
                gpcr_summary = "no info"
                gpcr_subset = "no info"
                taxonomy = "no info"
                tree_list = "no info"
                return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list
            
        else:
            gpcr_subset = "Select 'True' for customised organisms, summary tables would reflect all the dataset"
            customised_flag = ""
            
        if alphafold == True:
    
            gpcr_list = selected_data['Entry']
            name_list = folder_name + '/' + name + '_' + type + '_' + customised_flag + '.txt'
            gpcr_list.to_csv(name_list, header=False, index=False)
    
        if tree_mapping == True:
    
            tree_list = selected_data[['Entry Name','Entry','Organism','Org','Scientific_Name','Gene','Protein names','Length']]
            tree_list['gpcr'] = name
    
        #this is one of the main function to summaryze the info
    
        gpcr_genes, gpcr_summary, taxonomy = process_dataset(name, type, extension, selected_data, gene_count)
    
        print('Summarizes:', name)
    
        return gpcr_genes, gpcr_summary, gpcr_subset, taxonomy, tree_list

## working directory

In [29]:
os.chdir('/mnt/c/Users/ek23810/OneDrive - University of Bristol/term2_project/foldtree_tidy/outputs/mining/')  # Provide the new path here

In [30]:
out_dir = input('Set the output folder:')
out_dir = out_dir

Set the output folder: redo9-trash


## Load databases - Any GPCR

In [9]:
gpcr_input = input('Uniprot GPCR identifier:')
taxa_input = input('Taxa to retrieve:')

Uniprot GPCR identifier: PF00001,PF00002,PF00003,PF01534,PF02076,PF02101,PF02116,PF02117,PF02118,PF02175,PF03006,PF03125,PF03383,PF03402,PF03619,PF04080,PF05296,PF05462,PF05875,PF06454,PF06814,PF10192,PF10292,PF10316,PF10317,PF10318,PF10319,PF10320,PF10321,PF10322,PF10323,PF10324,PF10325,PF10326,PF10327,PF10328,PF11710,PF11970,PF13853,PF13965,PF15100,PF12430
Taxa to retrieve: 2759


In [31]:
gpcr_list = gpcr_input.split(',')
taxa_list = taxa_input.split(',')

In [33]:
#count the number of files 
c = 0
valid_trees = []
for taxa in taxa_list:
    for gpcr in gpcr_list:
        # file name
        PFAMID_taxa = gpcr + '-' + taxa
        print(PFAMID_taxa)
        file_download = '../../datasets/full/uniprot-' + PFAMID_taxa + '.tsv'
        
        # validation is a dataframe
        try:
            with open(file_download, 'r') as f:
                lines = f.readlines()
        except FileNotFoundError:
            c += 1
            print('FileNotFound')
            continue

        # continue, as the daframe contains data
        
        dataset = pd.read_csv(file_download, sep='\t')
        
        # parameters
        macro = ''
        
        type = 'all'
        extension = ''   
        gene_count = False
        # <------------------------------------------ select this one for custom organisms -------------------------------------------->
        custom_organisms = True
        # <------------------------------------------ select this one for custom organisms -------------------------------------------->
        nonredundant = False
        tree_mapping = True
        
        alphafold = True
         
        #generate a folder to store outputs
        alphafold_folder = out_dir + '/' + 'entries'
        # Specify the name of the new folder~
        folder_name = alphafold_folder + '/' + PFAMID_taxa + '_alphafold' + '_' + type + '_' + str(datetime.datetime.now())
        
        # Create the folder in the current working directory
        os.mkdir(folder_name)
    
        ## Any GPCR
        db_genes, db_summary, db_subset, db_taxonomy, db_tree = uniprot_summary(dataset, gpcr, type, extension, nonredundant, gene_count, 
                                                                           custom_organisms, alphafold, folder_name, tree_mapping)
        
        ## checking the tree
        if isinstance(db_tree, pd.DataFrame):  # Check if the variable is a valid DataFrame
            db_tree['gpcr'] = gpcr
            db_tree['taxa'] = taxa
            valid_trees.append(db_tree)
        
# Concatenate the valid DataFrames
if valid_trees == []:
    print('Nothing to generate. Entries is an empty folder')
else:
    tree_map_list = pd.concat(valid_trees, ignore_index=True)

    ## add pfam label
    label = pd.read_csv('../../pfam_labels.csv', sep=',')
    tree_map_list_labelled = pd.merge(tree_map_list, label,  left_on='gpcr', right_on= 'Accession', how='outer')

    ## add tree directory
    ## merge with taxonomy
    taxonomy_import = pd.read_csv('../../taxonomy_full_list_revised.csv')
    taxonomy_import = taxonomy_import.drop('Org', axis=1)
    
    tree_map_list_labelled = pd.merge(tree_map_list_labelled, taxonomy_import,  left_on='Scientific_Name', right_on= 'Scientific_Name', how='left')
    
    ## this merge was used for the file exported for the full dataset redo6 (17600 rows)
    tree_map_list_labelled['RL_Entry_Name'] = tree_map_list_labelled['Entry'] + '_' + tree_map_list_labelled['Org_Uq']

    tree_dir = out_dir + '/' + 'tree_metadata'
    comment = 'full'
    name_to_list =  tree_dir + '/' + 'compl' + comment + 'tree_map' + '_' + type + '.txt'
    tree_map_list_labelled.to_csv(name_to_list, header=True, index=False)

PF00001-2759


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
Entries found for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda !
Da

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
Entries found for  Phaeocystis antarctica !
Entries found for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
Entries found for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
Entries found for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda !
DataFrame is empt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
Entries found for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

PF02101-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda !
DataFrame is empty for  Compsopogon caeruleus !
DataFrame is empty for  Rhodosorus marinus !
DataFrame is empty for  Timspurckia oligopyrenoides !
DataFrame is empty for  Erythrolobus australicus !
Empty DataFrame
Columns: [Entry, Reviewed, Entry Name, Protein names, Gene Names, Organism, Length, AlphaFoldDB, Ta

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A 

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
Entries found for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
Entries found for  Chrysochromulina tobinii !
Entries found for  Phaeocystis antarctica !
Entries found for  Calcidiscus leptoporus !
Entries found for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
Entries found for  Haptolina ericina !
Entries found for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
Entries found for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
Entries found for  Gracilariopsis chorda !
Entries found for  Compsopogon caeruleus !

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
Entries found for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
Entries found for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
Entries found for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
Entries found for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
Entries found for  Gracilariopsis chorda !
Entries found for

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

Summarizes: PF04080
PF05296-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

Summarizes: PF05296
PF05462-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
Entries found for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
Entries found for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
Entries found for  Haptolina ericina !
Entries found for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
Entries found for  Prymnesium polylepis !
Entries found for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
Entries found for  Chrysochromulina tobinii !
Entries found for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
Entries found for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
Entries found for  Haptolina ericina !
Entries found for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
Entries found for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
Entries found for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda !
DataFrame is empty for  Com

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
Entries found for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
Entries found for  Calcidiscus leptoporus !
Entries found for  Coccolithus braarudii !
Entries found for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
Entries found for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
Entries found for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariopsis chorda !
Entries found for

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

Summarizes: PF10192
PF10292-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data['Scientific_Name'] = selected_data['Organism'].apply(lambda x: ' '.join(x.split()[:2]))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a

Empty DataFrame
Columns: [Entry, Reviewed, Entry Name, Protein names, Gene Names, Organism, Length, AlphaFoldDB, Taxonomic lineage, Organism (ID), Gene, Org]
Index: []
No species specific to include!
Summarizes: PF10320
PF10321-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymne

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
Entries found for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracilariop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] =

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['taxa'] = taxa
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii !
DataFrame is empty for  Gracil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data['Scientific_Name'] = selected_data['Organism'].apply(lambda x: ' '.join(x.split()[:2]))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tree_list['gpcr'] = name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  db_tree['gpcr'] = gpcr
A value is trying to be set on a copy of a slice from a

Summarizes: PF13965
PF15100-2759
DataFrame is empty for  Sphaeroforma arctica !
DataFrame is empty for  Abeoforma whisleri !
DataFrame is empty for  Pirum gemmata !
DataFrame is empty for  Corallochytrium limacisporum !
DataFrame is empty for  Diacronema lutheri (Unicellular marine alga) (Monochrysis lutheri) !
DataFrame is empty for  Chrysochromulina tobinii !
DataFrame is empty for  Phaeocystis antarctica !
DataFrame is empty for  Calcidiscus leptoporus !
DataFrame is empty for  Coccolithus braarudii !
DataFrame is empty for  Haptolina brevifila !
DataFrame is empty for  Haptolina ericina !
DataFrame is empty for  Emiliania huxleyi (Coccolithophore) (Pontosphaera huxleyi) !
DataFrame is empty for  Chrysotila carterae (Marine alga) (Syracosphaera carterae) !
DataFrame is empty for  Prymnesium polylepis !
DataFrame is empty for  Palpitomonas bilix !
DataFrame is empty for  Daphnia magna !
DataFrame is empty for  Podila verticillata NRRL 6337 !
DataFrame is empty for  Nematocida parisii

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  database['Taxonomic lineage'] = database['Taxonomic lineage'].apply(lambda x: str(x).replace("cellular organisms (no rank), Eukaryota (superkingdom), ",""))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpcr[['Gene', 'Org']] = gpcr['Entry Name'].str.split('_', n=2, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retur

Summarizes: PF12430


In [106]:
pfam = pd.read_csv('../../datasets/full/uniprot-PF10323-35493.tsv', sep='\t')

In [107]:
pfam[['Gene', 'Org']] = pfam['Entry Name'].str.split('_', n=2, expand=True)

In [108]:
pfam

Unnamed: 0,Entry,Reviewed,Entry Name,Protein names,Gene Names,Organism,Length,AlphaFoldDB,Taxonomic lineage,Organism (ID),Gene,Org
0,A0AAE0DSG8,unreviewed,A0AAE0DSG8_9ROSI,non-specific serine/threonine protein kinase (...,Dsin_031565,Dipteronia sinensis,427,,"cellular organisms (no rank), Eukaryota (super...",43782,A0AAE0DSG8,9ROSI
