In [2]:
#client.shutdown()

import pandas as pd 
import numpy as np
import dask.dataframe as dd
from dask.distributed import Client, SSHCluster

cluster = SSHCluster(
    ["scheduler", "scheduler", "worker1", "worker2"],
    connect_options={"known_hosts":None, 'password':'pb/yB4bA2'},
    scheduler_options={"port": 8786, "dashboard_address": ":8788"}
)

client = Client(cluster)

2024-07-04 12:11:21,627 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:21,626 - distributed.http.proxy - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2024-07-04 12:11:21,655 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:21,655 - distributed.scheduler - INFO - State start
2024-07-04 12:11:21,659 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:21,659 - distributed.scheduler - INFO -   Scheduler at:   tcp://10.67.22.174:8786
2024-07-04 12:11:22,085 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:22,085 - distributed.nanny - INFO -         Start Nanny at: 'tcp://10.67.22.174:34655'
2024-07-04 12:11:22,410 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:22,409 - distributed.worker - INFO -       Start worker at:   tcp://10.67.22.174:35657
2024-07-04 12:11:22,612 - distributed.deploy.ssh - INFO - 2024-07-04 12:11:22,611 - distributed.nanny - INFO -         Start Nanny at: 'tcp://10.67.22.25

We've chosen to use Dask since our dataset is too large to fit in memory. Dask is an open-source Python library designed for parallel computing, allowing Python code to scale across multi-core local machines. We primarily work with Dask DataFrame, a high-level collection that enables parallelization of DataFrame-based workloads. A Dask DataFrame consists of many smaller Pandas DataFrames, partitioned along the index.

Firstly, we specify the paths of the two datasets and run the following code for each of them, one at a time:

In [12]:
path = '/mnt/data/Z0.00014_a3_cM70/output_0.csv' # local path of the dataset at Z = 0.02

'''
path = '/mnt/data/Z0.00014_a3_cM70/output_*.csv' # local path of the dataset at Z = 0.00014
'''

df = dd.read_csv(path, dtype={'BEvent': float})

In [13]:
df.head(5)


Unnamed: 0,ID,name,Mass_0,Radius_0,Phase_0,PhaseBSE_0,RemnantType_0,Hsup_0,Mass_1,Radius_1,...,RemnantType_1,Hsup_1,Semimajor,Eccentricity,BWorldtime,Period,GWtime,RL0,RL1,BEvent
0,0,144668535680303,1.208462,1.6e-05,7,13.0,5,0.0,11.84389,3.936276,...,0,0.0,139.0903,0.437699,19.30851,0.143997,140246700.0,28.92884,80.19596,0.0
1,0,144668535680303,1.208462,1.6e-05,7,13.0,5,0.0,11.84389,3.936276,...,0,0.0,139.0903,0.437699,19.30851,0.143997,140246700.0,28.92884,80.19596,-1.0
2,0,144668535680303,1.208462,1.6e-05,7,13.0,5,0.0,11.84374,4.250581,...,0,0.0,139.092,0.437699,20.46043,0.144001,140256600.0,28.92928,80.19674,-1.0
3,0,144668535680303,1.208462,1.6e-05,7,13.0,5,0.0,11.84363,4.513947,...,0,0.0,139.0932,0.437699,21.23934,0.144003,140264000.0,28.92962,80.19733,-1.0
4,0,144668535680303,1.208462,1.6e-05,7,13.0,5,0.0,11.84353,4.75259,...,0,0.0,139.0942,0.437699,21.90684,0.144005,140270400.0,28.9299,80.19784,-1.0


The datasets are two outputs computed at different metallicities by the population synthesis code SEVN (Stellar EVolution for N-body). We work with binary systems whose evolution is described by SEVN including the following processes: wind mass transfer, Roche-lobe overflow (RLO), common envelope (CE), stellar tides, circularization at the RLO onset, collision at periastron, orbit decay by GW emission, and stellar mergers.

SEVN uses a prediction-correction method to adapt the time-step accounting for the large physical range of timescales typical of stellar and binary evolution. To decide the time-step, it looks at a sub-set of stellar and binary properties: if any of them changes too much during a time-step, it reduces the time-step and repeats the calculation. A special treatment is used when a star approaches a change of phase to guarantee that the stellar properties are evaluated just after and before the change of phase. 

The main goal of this section is to identify the systems that transition through a phase involving a black hole and a Wolf-Rayet star, form a binary black hole, and end up merging via emission of gravitational waves.

### Selecting systems

First, we select the IDs of the systems that, at a certain time step during their evolution, consist of a Wolf-Rayet star (PhaseBSE=7 or 8) and a black hole (PhaseBSE=14). Additionally, we select and save in `df_WRBH` the features we'll use in our analysis : _ID_, _Mass_0_, _Mass_1_, _Semimajor_, and _Eccentricity_.

Next, we select the IDs of binary black holes that merge via gravitational waves emission. To do so we reqiure that the sum of _GWtime_ (GW orbital decay time in Myr) and _BWorldtime_ (time elapsed in the simulations) is smaller than 14 billions years, an upper limit on the estimation of the age of the universe. 

In [14]:
df_filtered = df[['ID', 'Mass_0', 'PhaseBSE_0', 'Mass_1', 'PhaseBSE_1', 'Semimajor', 'Eccentricity', 'BWorldtime', 'GWtime' ]]

cond_1 = ((df.PhaseBSE_0==8) | (df.PhaseBSE_0==7)) & (df.PhaseBSE_1==14.0) 
cond_2 = ((df.PhaseBSE_1==8) | (df.PhaseBSE_1==7)) & (df.PhaseBSE_0==14.0)
cond_3 = (df.PhaseBSE_0 == 14.0) & (df.PhaseBSE_1 == 14.0)
cond_4 = (df.GWtime + df.BWorldtime) < int(14e+03)


df_filtered = df_filtered.loc[ cond_1 | cond_2 | cond_3| cond_4]


data_WRBH = df_filtered.loc[cond_1|cond_2].drop_duplicates('ID') # selecting only the 1st row for each ID
id_WRBH = data_WRBH['ID'] # taking only the IDs


id_GW = df_filtered.loc[cond_3 & cond_4]['ID'].drop_duplicates()

id_WRBH.compute()
id_GW.compute()

id_both = np.intersect1d(id_WRBH, id_GW) # ID of WR-BH systems that evolve in a BH-BH systems that merge via GW emission

In [15]:
print('Binary systems of a Wolf-Rayet and a Black Hole: ', len(id_WRBH))
print('Binary systems that merge via GW: ', len(id_GW))
print('Binary systems that evolve from a WR-BH and merge via GW: ', len(id_both))

Binary systems of a Wolf-Rayet and a Black Hole:  4528
Binary systems that merge via GW:  857
Binary systems that evolve from a WR-BH and merge via GW:  718
