# Weevil manuscript
Community assembly and diversification in a species‐rich radiation of island weevils (Coleoptera: Cratopini)
https://onlinelibrary.wiley.com/doi/abs/10.1111/jbi.13393

The manuscript gives OTU-ish like things and also has a phylogeny, which could be useful.

In [9]:
import pandas as pd
import numpy as np
import os
from scipy.stats import entropy

pd.set_option('display.max_rows', 1100)

metadata_df = pd.read_csv("Metadata/Supplementary_names.csv", index_col=0)
metadata_df = metadata_df.fillna("Mauritius")
metadata_df

Unnamed: 0_level_0,Exclude,Organism,Site,Country,Island,name,colour,clade
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
T-Reu_2367,No,Cratopopsis_alluaudi,Plaine_des_Gregues,France,Reunion Island,T-Reu_2367_Cratopopsis_alluaudi_(Plaine_des_Gr...,Blue,Reunion Cratopopsis group
T-Reu_3153,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_3153_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_3812,No,Cratopopsis_alluaudi,Sentier_Trophee_Mondial,France,Reunion Island,T-Reu_3812_Cratopopsis_alluaudi_(Sentier_Troph...,Blue,Reunion Cratopopsis group
T-Reu_4047,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4047_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4056,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4056_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4057,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4057_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4058,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4058_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4343,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4343_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4374,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4374_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4396,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4396_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group


In [2]:
set(metadata_df["Island"])

{'GenBank_(GU176345.1)', 'Mauritius', 'Reunion Island', 'Rodrigues Island'}

In [10]:
## Pack everything into a dictionary
data_dicts = {"Reunion Island":{}, "Mauritius":{}}
for i in data_dicts:
    data_dicts[i] = {x:[] for x in set(metadata_df[metadata_df["Island"] == i]["Organism"])}

## Or do it separately. Either way works, lets see which is less annoying.
#reunion_clades = {x:[] for x in set(metadata_df[metadata_df["Island"] == "Reunion Island"]["clade"])}
#mauritius_clades = {x:[] for x in set(metadata_df[metadata_df["Island"] == "Mauritius"]["clade"])}
print(data_dicts)

{'Reunion Island': {'Cratopus_brunnipes': [], 'Cratopus_nigridorsis': [], 'Cratopus_frappieri': [], 'Cratopopsis_bistigma': [], 'Cratopus_ditissimus': [], 'Cratopopsis_alluaudi': [], 'Cratopus_sumptuosus': [], 'Cratopopsis_nitidifrons': [], 'Cratopus_humeralis': [], 'Cratopopsis_villosulus': [], 'Cratopopsis_coquereli': [], 'Cratopopsis_obscurus': [], 'Scaevinus_dombayae': [], 'Cratopopsis_cribatus': [], 'Cratopus_bernei': [], 'Cratopus_murinus': [], 'Cratopopsis_antiquus': [], 'Cratopopsis_fulvicornis': [], 'Cratopus_septemvittatus': [], 'Cratopus_tristis': [], 'Cratopus_punctum': [], 'Cratopus_frapieri': [], 'Cratopus_circumcinctus': [], 'Cratopus_marmoreus': [], 'Cratopus_leucophaeatus': [], 'Cratopus_nanus': [], 'Cratopus_fulvescens': []}, 'Mauritius': {'Cratopus_cariei': [], 'Cratopopsis_mauritianus': [], 'Cratopus_viridulus': [], 'Cratopus_vulgaris': [], 'Cratopus_tigratus': [], 'Cratopopsis_impressus': [], 'Cratopus_aeneoniger': [], 'Cratopus_nigrogranatus': [], 'Cratopus_psitta

## Sort sequences into clades per island

In [11]:
nexfile = "SuppData/RAxML/Input/Supplementary_COII.nex"

lines = open(nexfile).readlines()
lines = lines[921:1831]
print(len(lines))

for line in lines:
    sample, sequence = line.strip().split()
    if metadata_df.loc[sample]["Island"] in ["GenBank_(GU176345.1)", "Rodrigues Island"]:
        continue
    data_dicts[metadata_df.loc[sample]["Island"]][metadata_df.loc[sample]["Organism"]].append(line.strip())

abunds = {}
for d in data_dicts:
    abunds[d] = []
    print(d, len(data_dicts[d]))
    for i in data_dicts[d]:
        print(i, len(data_dicts[d][i]))
        abunds[d].append(len(data_dicts[d][i]))
    print("Numinds {} {}".format(d, sum(abunds[d])))
    print("\n")

910
Reunion Island 27
Cratopus_brunnipes 22
Cratopus_nigridorsis 5
Cratopus_frappieri 99
Cratopopsis_bistigma 4
Cratopus_ditissimus 7
Cratopopsis_alluaudi 12
Cratopus_sumptuosus 88
Cratopopsis_nitidifrons 3
Cratopus_humeralis 73
Cratopopsis_villosulus 9
Cratopopsis_coquereli 11
Cratopopsis_obscurus 22
Scaevinus_dombayae 9
Cratopopsis_cribatus 6
Cratopus_bernei 4
Cratopus_murinus 15
Cratopopsis_antiquus 11
Cratopopsis_fulvicornis 19
Cratopus_septemvittatus 3
Cratopus_tristis 1
Cratopus_punctum 5
Cratopus_frapieri 1
Cratopus_circumcinctus 5
Cratopus_marmoreus 4
Cratopus_leucophaeatus 3
Cratopus_nanus 97
Cratopus_fulvescens 1
Numinds Reunion Island 539


Mauritius 26
Cratopus_cariei 5
Cratopopsis_mauritianus 5
Cratopus_viridulus 1
Cratopus_vulgaris 39
Cratopus_tigratus 6
Cratopopsis_impressus 4
Cratopus_aeneoniger 20
Cratopus_nigrogranatus 3
Cratopus_psittacus 6
Cratopus_deceptus 8
Cratopus_striga 1
Scaevinus_subtruncatus 10
Cratopus_viridilimbatus 25
Cratopus_stigmaeus 2
Cratopus_fascige

In [12]:
for i in abunds:
    print(i, entropy(abunds[i]))

Reunion Island 2.5130863905892955
Mauritius 2.7441869381462656


## Write sequences to fasta files per clade per island

In [13]:
fastadir = "weevil_fasta/"
reunion_dir = fastadir + "reunion/"
mauritius_dir = fastadir + "mauritius/"
dirs = {"Reunion Island":reunion_dir, "Mauritius":mauritius_dir}
if not os.path.exists(mauritius_dir):
    os.mkdir(fastadir)
    os.mkdir(reunion_dir)
    os.mkdir(mauritius_dir)
for i in data_dicts:
    print(i)
    for c in data_dicts[i]:
        clade_vcf = dirs[i] + c.replace(" ", "_") + ".fasta"
        with open(clade_vcf, 'w') as outfile:
            for samp in data_dicts[i][c]:
                name, seq = samp.split()
                outfile.write(">{}\n{}\n".format(name, seq))

Reunion Island
Mauritius


## Make observed SGD files

In [6]:
!/home/isaac/Continuosity/tmp/MESS/make_obs.py -f weevil_fasta/mauritius/ -o mauritius.obs
!/home/isaac/Continuosity/tmp/MESS/make_obs.py -f weevil_fasta/reunion/ -o reunion.obs


Traceback (most recent call last):
  File "/home/isaac/Continuosity/tmp/MESS/make_obs.py", line 126, in <module>
    pis_df = pd.DataFrame.from_dict(pis, orient="index", columns=colname)
TypeError: from_dict() got an unexpected keyword argument 'columns'
Traceback (most recent call last):
  File "/home/isaac/Continuosity/tmp/MESS/make_obs.py", line 126, in <module>
    pis_df = pd.DataFrame.from_dict(pis, orient="index", columns=colname)
TypeError: from_dict() got an unexpected keyword argument 'columns'


In [21]:
d = {'Mauritius':'mauritius.pis', "Reunion Island":"reunion.pis"} 
for k,v in d.items():
    print(len(data_dicts[k]))

26
27


In [51]:
pidf = pd.read_csv("reunion.pis", index_col=0).T
abund_dict = {k:len(v) for k, v in data_dicts["Reunion Island"].items()}
abdf = pd.DataFrame(abund_dict, index=[0])
df = pd.concat([pidf, abdf], sort=True)
df.index = ["pi", "abundance"]
df.T.to_csv("reunion.dat")

pidf = pd.read_csv("mauritius.pis", index_col=0).T
abund_dict = {k:len(v) for k, v in data_dicts["Mauritius"].items()}
abdf = pd.DataFrame(abund_dict, index=[0])
df = pd.concat([pidf, abdf], sort=True)
df.index = ["pi", "abundance"]
df.T.to_csv("mauritius.dat")

In [52]:
!cat mauritius.dat

,pi,abundance
Cratopopsis_impressus,0.043740573152337855,4.0
Cratopopsis_mauritianus,0.002413273001508296,5.0
Cratopus_aeneoniger,0.027849699162196533,20.0
Cratopus_armatus,0.005221023320570831,14.0
Cratopus_caliginosus,0.05994149991588898,47.0
Cratopus_cariei,0.00874811463046757,5.0
Cratopus_confusus,0.02075524925643301,21.0
Cratopus_deceptus,0.027318106729871432,8.0
Cratopus_emarginatus,0.014379084967320262,6.0
Cratopus_fasciger,0.0,1.0
Cratopus_melanocephalus,0.007670760611937081,15.0
Cratopus_molitor,0.0,1.0
Cratopus_mundulus,0.0899949723479135,4.0
Cratopus_murinus,0.0746012275872092,59.0
Cratopus_nigrogranatus,0.021116138763197574,3.0
Cratopus_ovalis,0.02412796064225677,23.0
Cratopus_psittacus,0.007943690296631471,6.0
Cratopus_punctum,0.01711666915286825,13.0
Cratopus_stigmaeus,0.02564102564102564,2.0
Cratopus_striga,0.0,1.0
Cratopus_tigratus,0.028456510809451982,6.0
Cratopus_variegatus,0.021116138763197588,4.0
Cratopus_viridilimbatus,0.02632270273810184,25.