# Weevil manuscript
Community assembly and diversification in a species‐rich radiation of island weevils (Coleoptera: Cratopini)
https://onlinelibrary.wiley.com/doi/abs/10.1111/jbi.13393

In [19]:
import pandas as pd
import numpy as np
import os
from scipy.stats import entropy

metadata_df = pd.read_csv("Metadata/Supplementary_names.csv", index_col=0)
metadata_df = metadata_df.fillna("Mauritius")
metadata_df

Unnamed: 0_level_0,Exclude,Organism,Site,Country,Island,name,colour,clade
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
T-Reu_2367,No,Cratopopsis_alluaudi,Plaine_des_Gregues,France,Reunion Island,T-Reu_2367_Cratopopsis_alluaudi_(Plaine_des_Gr...,Blue,Reunion Cratopopsis group
T-Reu_3153,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_3153_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_3812,No,Cratopopsis_alluaudi,Sentier_Trophee_Mondial,France,Reunion Island,T-Reu_3812_Cratopopsis_alluaudi_(Sentier_Troph...,Blue,Reunion Cratopopsis group
T-Reu_4047,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4047_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4056,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4056_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4057,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4057_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4058,No,Cratopopsis_alluaudi,Les_Makes,France,Reunion Island,T-Reu_4058_Cratopopsis_alluaudi_(Les_Makes-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4343,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4343_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4374,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4374_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group
T-Reu_4396,No,Cratopopsis_alluaudi,Cap_Blanc,France,Reunion Island,T-Reu_4396_Cratopopsis_alluaudi_(Cap_Blanc-Reu...,Blue,Reunion Cratopopsis group


In [12]:
set(metadata_df["Island"])

{'GenBank_(GU176345.1)', 'Mauritius', 'Reunion Island', 'Rodrigues Island'}

In [16]:
## Pack everything into a dictionary
data_dicts = {"Reunion Island":{}, "Mauritius":{}}
for i in data_dicts:
    data_dicts[i] = {x:[] for x in set(metadata_df[metadata_df["Island"] == i]["clade"])}

## Or do it separately. Either way works, lets see which is less annoying.
#reunion_clades = {x:[] for x in set(metadata_df[metadata_df["Island"] == "Reunion Island"]["clade"])}
#mauritius_clades = {x:[] for x in set(metadata_df[metadata_df["Island"] == "Mauritius"]["clade"])}
print(data_dicts)

{'Mauritius': {'Mauritius Cratopopsis group': [], 'Cratopus melanocephalus': [], 'Cratopus carei': [], 'Cratopus aeneoniger group': [], 'Cratopus murinus': [], 'Cratopus caliginosus': [], 'Cratopus molitor': [], 'Cratopus viridipunctatus': [], 'Scaevinus subtruncatus': [], 'Cratopus variegatus': [], 'Cratopus mundulus': [], 'Cratopus striga': [], 'Cratopus psittacus': [], 'Cratopus armatus': [], 'Cratopus nigrogranatus': [], 'Cratopus viridilimbatus': [], 'Cratopus ovalis': [], 'Cratopus fasciger': [], 'Cratopus punctum': []}, 'Reunion Island': {'Mauritius Cratopopsis group': [], 'Reunion Cratopopsis group': [], 'Cratopus sumptuosus': [], 'Cratopus septemvittatus': [], 'Cratopus nigridorsis group': [], 'Cratopus aeneoniger group': [], 'Cratopus nanus': [], 'Scaevinus dombayae': [], 'Cratopus humeralis group': [], 'Cratopus bernei': [], 'Cratopus ditissimus': [], 'Cratopus frapieri group': [], 'Cratopus murinus': [], 'Cratopus punctum': []}}


## Sort sequences into clades per island

In [17]:
nexfile = "RAxML/Input/Supplementary_COII.nex"

lines = open(nexfile).readlines()
lines = lines[921:1831]
print(len(lines))

for line in lines:
    sample, sequence = line.strip().split()
    if metadata_df.loc[sample]["Island"] in ["GenBank_(GU176345.1)", "Rodrigues Island"]:
        continue
    data_dicts[metadata_df.loc[sample]["Island"]][metadata_df.loc[sample]["clade"]].append(line.strip())

abunds = {}
for d in data_dicts:
    abunds[d] = []
    print(d, len(data_dicts[d]))
    for i in data_dicts[d]:
        print(i, len(data_dicts[d][i]))
        abunds[d].append(len(data_dicts[d][i]))
    print("Numinds {} {}".format(d, sum(abunds[d])))
    print("\n")

910
('Mauritius', 19)
('Mauritius Cratopopsis group', 9)
('Cratopus melanocephalus', 15)
('Cratopus carei', 5)
('Cratopus aeneoniger group', 96)
('Cratopus murinus', 70)
('Cratopus caliginosus', 42)
('Cratopus molitor', 1)
('Cratopus viridipunctatus', 1)
('Scaevinus subtruncatus', 10)
('Cratopus variegatus', 4)
('Cratopus mundulus', 4)
('Cratopus striga', 1)
('Cratopus psittacus', 6)
('Cratopus armatus', 14)
('Cratopus nigrogranatus', 3)
('Cratopus viridilimbatus', 25)
('Cratopus ovalis', 23)
('Cratopus fasciger', 1)
('Cratopus punctum', 13)
Numinds Mauritius 343


('Reunion Island', 14)
('Mauritius Cratopopsis group', 4)
('Reunion Cratopopsis group', 93)
('Cratopus sumptuosus', 88)
('Cratopus septemvittatus', 3)
('Cratopus nigridorsis group', 14)
('Cratopus aeneoniger group', 22)
('Cratopus nanus', 97)
('Scaevinus dombayae', 9)
('Cratopus humeralis group', 77)
('Cratopus bernei', 4)
('Cratopus ditissimus', 7)
('Cratopus frapieri group', 101)
('Cratopus murinus', 15)
('Cratopus punctum

In [20]:
for i in abunds:
    print(i, entropy(abunds[i]))

('Mauritius', 2.245779463443025)
('Reunion Island', 2.094381971099274)


## Write sequences to fasta files per clade per island

In [75]:
fastadir = "weevil_fasta/"
reunion_dir = fastadir + "reunion/"
mauritius_dir = fastadir + "mauritius/"
dirs = {"Reunion Island":reunion_dir, "Mauritius":mauritius_dir}
if not os.path.exists(mauritius_dir):
    os.mkdir(fastadir)
    os.mkdir(reunion_dir)
    os.mkdir(mauritius_dir)
for i in data_dicts:
    print(i)
    for c in data_dicts[i]:
        clade_vcf = dirs[i] + c.replace(" ", "_") + ".fasta"
        with open(clade_vcf, 'w') as outfile:
            for samp in data_dicts[i][c]:
                name, seq = samp.split()
                outfile.write(">{}\n{}\n".format(name, seq))

Mauritius
Reunion Island


## Make observed SGD files

In [86]:
!../make_obs.py -f weevil_fasta/mauritius/ -o mauritius.obs
!../make_obs.py -f weevil_fasta/reunion/ -o reunion.obs


In [87]:
!cat mauritius.obs
!cat reunion.obs

	bin_0	bin_1	bin_2	bin_3	bin_4	bin_5	bin_6	bin_7	bin_8	bin_9
4	5	1	6	1	0	0	0	1	1	bin_0	bin_1	bin_2	bin_3	bin_4	bin_5	bin_6	bin_7	bin_8	bin_9
0	4	2	3	3	0	0	1	0	1