## Step 1: Imports

In [1]:
import pandas as pd
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch
import random
import pickle
import os
import sys

## Step 2: Define configs

In [2]:
config = {
    "seed": 0,
    "cutoff_date": "2020-01-01",
    "test_cutoff_date": "2022-05-01",
    "max_len": 384,
    "batch_size": 1,
    "learning_rate": 1e-4,
    "weight_decay": 0.0,
    "mixed_precision": "bf16",
    "model_config_path": "../working/configs/pairwise.yaml",  # Adjust path as needed
    "epochs": 10,
    "cos_epoch": 5,
    "loss_power_scale": 1.0,
    "max_cycles": 1,
    "grad_clip": 0.1,
    "gradient_accumulation_steps": 1,
    "d_clamp": 30,
    "max_len_filter": 9999999,
    "structural_violation_epoch": 50,
    "balance_weight": False,
}

In [3]:
Test = False

## Step 3: Load test data

In [4]:
test_data = pd.read_csv("/kaggle/input/folding-strategy-rna/rna_dataset_with_fold.csv")
test_data = test_data[test_data["fold"]==1]
test_data = test_data.reset_index(drop=True)
if Test:
    test_data = test_data[0:1]
    
test_data.head()

Unnamed: 0,target_id,sequence,temporal_cutoff,description,all_sequences,sequence_length,fold
0,1FFK_9,UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAAC...,2000-08-14,CRYSTAL STRUCTURE OF THE LARGE RIBOSOMAL SUBUN...,>1FFK_1|Chain A[auth 0]|23S RRNA|Haloarcula ma...,122,1
1,1FOQ_A,UCAAUGGUACGGUACUUCCAUUGUCAUGUGUAUGUUGGGGAUUAAA...,2000-12-22,PENTAMERIC MODEL OF THE BACTERIOPHAGE PHI29 PR...,">1FOQ_1|Chains A, B, C, D, E|BACTERIOPHAGE PHI...",120,1
2,1S9S_A,GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGG...,2004-04-06,SOLUTION STRUCTURE OF MLV PSI SITE,>1S9S_1|Chain A|MLV Psi encapsidation site|nul...,101,1
3,1ZC8_Z,GCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGC...,2005-04-19,"Coordinates of tmRNA, SmpB, EF-Tu and h44 fitt...",>1ZC8_1|Chain A|TLD 16S ribosomal RNA|Thermus ...,91,1
4,1YSH_B,UAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAU...,2005-07-05,Localization and dynamic behavior of ribosomal...,>1YSH_1|Chain A|RNA (28-MER)|synthetic constru...,101,1


## Step 4: Create Dataset

In [5]:
from torch.utils.data import Dataset, DataLoader

class RNADataset(Dataset):
    def __init__(self,data):
        self.data=data
        self.tokens={nt:i for i,nt in enumerate('ACGU')}

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        seq = self.data.loc[idx,'sequence']
        seq = seq.replace("-","")
        seq = seq.replace("X","")
        sequence=[self.tokens[nt] for nt in seq]
        sequence=np.array(sequence)
        sequence=torch.tensor(sequence)
        return {'sequence':sequence}

In [6]:
test_dataset=RNADataset(test_data)

## Step 5: Load the model

In [7]:
sys.path.append("/kaggle/input/ribonanzanet2/pytorch/alpha/1")

import torch.nn as nn
from Network import *

class SinusoidalPosEmb(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, x):
        device = x.device
        half_dim = self.dim // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
        emb = x[:, None] * emb[None, :]
        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
        return emb

class finetuned_RibonanzaNet(RibonanzaNet):
    def __init__(self, config):
        config.dropout=0.1
        config.use_grad_checkpoint=True
        super(finetuned_RibonanzaNet, self).__init__(config)

        self.dropout=nn.Dropout(0.0)

        decoder_dim=768
        self.structure_module=[SimpleStructureModule(d_model=decoder_dim, nhead=12, 
                 dim_feedforward=decoder_dim*4, pairwise_dimension=config.pairwise_dimension, dropout=0.0) for i in range(6)]
        self.structure_module=nn.ModuleList(self.structure_module)

        self.xyz_embedder=nn.Linear(3,decoder_dim)
        self.xyz_norm=nn.LayerNorm(decoder_dim)
        self.xyz_predictor=nn.Linear(decoder_dim,3)
        
        self.adaptor=nn.Sequential(nn.Linear(config.ninp,decoder_dim),nn.LayerNorm(decoder_dim))

        self.distogram_predictor=nn.Sequential(nn.LayerNorm(config.pairwise_dimension),
                                                nn.Linear(config.pairwise_dimension,40))

        self.time_embedder=SinusoidalPosEmb(decoder_dim)

        self.time_mlp=nn.Sequential(nn.Linear(decoder_dim,decoder_dim),
                                    nn.ReLU(),  
                                    nn.Linear(decoder_dim,decoder_dim))
        self.time_norm=nn.LayerNorm(decoder_dim)

        self.distance2pairwise=nn.Linear(1,config.pairwise_dimension,bias=False)

        self.pair_mlp=nn.Sequential(nn.Linear(config.pairwise_dimension,config.pairwise_dimension),
                                    nn.ReLU(),
                                    nn.Linear(config.pairwise_dimension,config.pairwise_dimension))


    def custom(self, module):
        def custom_forward(*inputs):
            inputs = module(*inputs)
            return inputs
        return custom_forward
    
    def embed_pair_distance(self,inputs):
        pairwise_features,xyz=inputs
        distance_matrix=xyz[:,None,:,:]-xyz[:,:,None,:]
        distance_matrix=(distance_matrix**2).sum(-1).clip(2,37**2).sqrt()
        distance_matrix=distance_matrix[:,:,:,None]
        pairwise_features=pairwise_features+self.distance2pairwise(distance_matrix)

        return pairwise_features

    def forward(self,src,xyz,t):
        
        #with torch.no_grad():
        sequence_features, pairwise_features=self.get_embeddings(src, torch.ones_like(src).long().to(src.device))
        
        distogram=self.distogram_predictor(pairwise_features)

        sequence_features=self.adaptor(sequence_features)

        decoder_batch_size=xyz.shape[0]
        sequence_features=sequence_features.repeat(decoder_batch_size,1,1)
        

        pairwise_features=pairwise_features.expand(decoder_batch_size,-1,-1,-1)

        pairwise_features= checkpoint.checkpoint(self.custom(self.embed_pair_distance), [pairwise_features,xyz],use_reentrant=False)

        time_embed=self.time_embedder(t).unsqueeze(1)
        tgt=self.xyz_norm(sequence_features+self.xyz_embedder(xyz)+time_embed)

        tgt=self.time_norm(tgt+self.time_mlp(tgt))

        for layer in self.structure_module:
            #tgt=layer([tgt, sequence_features,pairwise_features,xyz,None])
            tgt=checkpoint.checkpoint(self.custom(layer),
            [tgt, sequence_features,pairwise_features,xyz,None],
            use_reentrant=False)
            # xyz=xyz+self.xyz_predictor(sequence_features).squeeze(0)
            # xyzs.append(xyz)
            #print(sequence_features.shape)
        
        xyz=self.xyz_predictor(tgt).squeeze(0)
        #.squeeze(0)

        return xyz, distogram
    

    def denoise(self,sequence_features,pairwise_features,xyz,t):
        decoder_batch_size=xyz.shape[0]
        sequence_features=sequence_features.expand(decoder_batch_size,-1,-1)
        pairwise_features=pairwise_features.expand(decoder_batch_size,-1,-1,-1)

        pairwise_features=self.embed_pair_distance([pairwise_features,xyz])

        sequence_features=self.adaptor(sequence_features)
        time_embed=self.time_embedder(t).unsqueeze(1)
        tgt=self.xyz_norm(sequence_features+self.xyz_embedder(xyz)+time_embed)
        tgt=self.time_norm(tgt+self.time_mlp(tgt))
        #xyz_batch_size=xyz.shape[0]
        


        for layer in self.structure_module:
            tgt=layer([tgt, sequence_features,pairwise_features,xyz,None])
            # xyz=xyz+self.xyz_predictor(sequence_features).squeeze(0)
            # xyzs.append(xyz)
            #print(sequence_features.shape)
        xyz=self.xyz_predictor(tgt).squeeze(0)
        # print(xyz.shape)
        # exit()
        return xyz




class SimpleStructureModule(nn.Module):

    def __init__(self, d_model, nhead, 
                 dim_feedforward, pairwise_dimension, dropout=0.1,
                 ):
        super(SimpleStructureModule, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, nhead, d_model//nhead, d_model//nhead, dropout=dropout)
        

        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.pairwise2heads=nn.Linear(pairwise_dimension,nhead,bias=False)
        self.pairwise_norm=nn.LayerNorm(pairwise_dimension)
        self.activation = nn.GELU()

        
    def custom(self, module):
        def custom_forward(*inputs):
            inputs = module(*inputs)
            return inputs
        return custom_forward

    def forward(self, input):
        tgt , src,  pairwise_features, pred_t, src_mask = input

        pairwise_bias=self.pairwise2heads(self.pairwise_norm(pairwise_features)).permute(0,3,1,2)

        res=tgt
        tgt,attention_weights = self.self_attn(tgt, tgt, tgt, mask=pairwise_bias, src_mask=src_mask)
        tgt = res + self.dropout1(tgt)
        tgt = self.norm1(tgt)

        res=tgt
        tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
        tgt = res + self.dropout2(tgt)
        tgt = self.norm2(tgt)


        return tgt


In [8]:
import yaml

class Config:
    def __init__(self, **entries):
        self.__dict__.update(entries)
        self.entries=entries

    def print(self):
        print(self.entries)

def load_config_from_yaml(file_path):
    with open(file_path, 'r') as file:
        config = yaml.safe_load(file)
    return Config(**config)


model=finetuned_RibonanzaNet(load_config_from_yaml("/kaggle/input/ribonanzanet2/pytorch/alpha/1/pairwise.yaml")).cuda()

constructing 48 ConvTransformerEncoderLayers


In [9]:
state_dict=torch.load("/kaggle/input/rnet3d-ddpm/RibonanzaNet-3D-DDPM.pt",map_location='cpu')

#get rid of module. from ddp state dict
new_state_dict={}

for key in state_dict:
    new_state_dict[key[7:]]=state_dict[key]

model.load_state_dict(new_state_dict)

  state_dict=torch.load("/kaggle/input/rnet3d-ddpm/RibonanzaNet-3D-DDPM.pt",map_location='cpu')


<All keys matched successfully>

## Step 6: Load diffusion model

In [10]:
class Diffusion(nn.Module):
    def __init__(self, model=None, n_times=1000, beta_minmax=[1e-4, 2e-2]):
    
        super(Diffusion, self).__init__()
    
        self.n_times = n_times

        self.model = model
        
        # define linear variance schedule(betas)
        beta_1, beta_T = beta_minmax
        betas = torch.linspace(start=beta_1, end=beta_T, steps=n_times)#.to(device) # follows DDPM paper
        self.sqrt_betas = torch.sqrt(betas)
                                     
        # define alpha for forward diffusion kernel
        self.alphas = 1 - betas
        self.sqrt_alphas = torch.sqrt(self.alphas)
        alpha_bars = torch.cumprod(self.alphas, dim=0)
        self.sqrt_one_minus_alpha_bars = torch.sqrt(1-alpha_bars)
        self.sqrt_alpha_bars = torch.sqrt(alpha_bars)
        
        #self.device = device
    
    def extract(self, a, t, x_shape):
        """
            from lucidrains' implementation
                https://github.com/lucidrains/denoising-diffusion-pytorch/blob/beb2f2d8dd9b4f2bd5be4719f37082fe061ee450/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py#L376
        """
        b, *_ = t.shape
        out = a.gather(-1, t)
        return out.reshape(b, *((1,) * (len(x_shape) - 1)))
    
    def scale_to_minus_one_to_one(self, x):
        # according to the DDPMs paper, normalization seems to be crucial to train reverse process network
        return x * 2 - 1
    
    def reverse_scale_to_zero_to_one(self, x):
        return (x + 1) * 0.5
    
    def make_noisy(self, x_zeros, t): 
        # assume we get raw data, so center and scale by 35
        x_zeros = x_zeros - torch.nanmean(x_zeros,1,keepdim=True)
        x_zeros = x_zeros/35
        #rotate randomly
        x_zeros = random_rotation_point_cloud_torch_batch(x_zeros)


        # perturb x_0 into x_t (i.e., take x_0 samples into forward diffusion kernels)
        epsilon = torch.randn_like(x_zeros).to(x_zeros.device)
        
        sqrt_alpha_bar = self.extract(self.sqrt_alpha_bars.to(x_zeros.device), t, x_zeros.shape)
        sqrt_one_minus_alpha_bar = self.extract(self.sqrt_one_minus_alpha_bars.to(x_zeros.device), t, x_zeros.shape)
        
        # Let's make noisy sample!: i.e., Forward process with fixed variance schedule
        #      i.e., sqrt(alpha_bar_t) * x_zero + sqrt(1-alpha_bar_t) * epsilon
        noisy_sample = x_zeros * sqrt_alpha_bar + epsilon * sqrt_one_minus_alpha_bar
    
        return noisy_sample.detach(), epsilon
    
    
    def forward(self, x_zeros):
        x_zeros = self.scale_to_minus_one_to_one(x_zeros)
        
        B, _, _, _ = x_zeros.shape
        
        # (1) randomly choose diffusion time-step
        t = torch.randint(low=0, high=self.n_times, size=(B,)).long().to(x_zeros.device)
        
        # (2) forward diffusion process: perturb x_zeros with fixed variance schedule
        perturbed_images, epsilon = self.make_noisy(x_zeros, t)
        
        # (3) predict epsilon(noise) given perturbed data at diffusion-timestep t.
        pred_epsilon = self.model(perturbed_images, t)
        
        return perturbed_images, epsilon, pred_epsilon
    
    
    def denoise_at_t(self, x_t, sequence_features, pairwise_features, timestep, t):
        B, _, _ = x_t.shape
        if t > 1:
            z = torch.randn_like(x_t).to(sequence_features.device)
        else:
            z = torch.zeros_like(x_t).to(sequence_features.device)
        
        # at inference, we use predicted noise(epsilon) to restore perturbed data sample.
        epsilon_pred = self.model.denoise(sequence_features, pairwise_features, x_t, timestep)
        
        alpha = self.extract(self.alphas.to(x_t.device), timestep, x_t.shape)
        sqrt_alpha = self.extract(self.sqrt_alphas.to(x_t.device), timestep, x_t.shape)
        sqrt_one_minus_alpha_bar = self.extract(self.sqrt_one_minus_alpha_bars.to(x_t.device), timestep, x_t.shape)
        sqrt_beta = self.extract(self.sqrt_betas.to(x_t.device), timestep, x_t.shape)
        
        # denoise at time t, utilizing predicted noise
        x_t_minus_1 = 1 / sqrt_alpha * (x_t - (1-alpha)/sqrt_one_minus_alpha_bar*epsilon_pred) + sqrt_beta*z
        
        return x_t_minus_1#.clamp(-1., 1)
                
    def sample(self, src, N):
        # start from random noise vector, NxLx3
        x_t = torch.randn((N, src.shape[1], 3)).to(src.device)
        
        # autoregressively denoise from x_T to x_0
        #     i.e., generate image from noise, x_T

        #first get conditioning
        sequence_features, pairwise_features=self.model.get_embeddings(src, torch.ones_like(src).long().to(src.device))
        # sequence_features=sequence_features.expand(N,-1,-1)
        # pairwise_features=pairwise_features.expand(N,-1,-1,-1)
        distogram=self.model.distogram_predictor(pairwise_features).squeeze()
        distogram=distogram.squeeze()[:,:,2:40]*torch.arange(2,40).float().cuda() 
        distogram=distogram.sum(-1)  

        for t in range(self.n_times-1, -1, -1):
            timestep = torch.tensor([t]).repeat_interleave(N, dim=0).long().to(src.device)
            x_t = self.denoise_at_t(x_t, sequence_features, pairwise_features, timestep, t)
        
        # denormalize x_0 into 0 ~ 1 ranged values.
        #x_0 = self.reverse_scale_to_zero_to_one(x_t)
        x_0 = x_t * 35
        return x_0, distogram


In [11]:
diffusion = Diffusion(model,1000).cuda()

## Step 7: Model Inference

In [12]:
from tqdm import tqdm
model.eval()
preds=[]
for i in tqdm(range(len(test_dataset))):
    src       = test_dataset[i]['sequence'].long()
    src       = src.unsqueeze(0).cuda()
    target_id = test_data.loc[i,'target_id']

    predicted_dm=[]
    with torch.no_grad():
        xyz,distogram=diffusion.sample(src,5)

    preds.append(xyz.cpu().numpy())

100%|██████████| 178/178 [1:05:02<00:00, 21.93s/it]


In [13]:
ID=[]
resname=[]
resid=[]
x=[]
y=[]
z=[]

data=[]

for i in range(len(test_data)):
    seq = test_data.loc[i,'sequence']
    if '-' in seq:
        seq = seq.replace('-', '')
    if 'X' in seq:
        seq = seq.replace('X', '')
    for j in range(len(seq)):
        row=[test_data.loc[i,'target_id']+f"_{j+1}",
             test_data.loc[i,'sequence'][j],
             j+1]

        for k in range(5):
            for kk in range(3):
                row.append(preds[i][k][j][kk])
        data.append(row)

columns=['ID','resname','resid']
for i in range(1,6):
    columns+=[f"x_{i}"]
    columns+=[f"y_{i}"]
    columns+=[f"z_{i}"]
submission=pd.DataFrame(data,columns=columns)


submission

Unnamed: 0,ID,resname,resid,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5
0,1FFK_9_1,U,1,7.699738,-20.951588,-10.459291,-9.077475,-22.036715,7.003453,18.252377,14.101513,-3.145502,-8.053536,21.395567,-8.530622,6.948162,17.010744,-16.263878
1,1FFK_9_2,U,2,2.604925,-26.583666,-5.402189,-3.743415,-23.423130,14.061972,16.348692,20.266676,2.953931,-12.740324,17.967844,-15.564795,16.367758,17.352926,-13.859782
2,1FFK_9_3,A,3,-1.858269,-20.648901,-8.629856,-7.242588,-16.901472,14.087174,11.164626,18.618736,-2.560803,-14.293916,14.972135,-8.315606,10.474780,17.613205,-8.031006
3,1FFK_9_4,G,4,2.482862,-16.927279,-14.155450,-13.652080,-16.734066,9.111277,13.976948,14.822346,-9.066764,-11.856204,19.577190,-2.195501,2.895576,19.112049,-10.485373
4,1FFK_9_5,G,5,4.035524,-12.544548,-17.200239,-16.773188,-14.135696,5.333009,13.538073,10.831151,-13.884682,-9.106638,19.990612,3.029280,-2.998585,18.802664,-10.422861
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25708,8Z1F_T_82,U,82,17.125757,-37.430660,-46.279278,38.029881,-39.412159,-3.585679,-26.464600,49.625088,-4.146244,-14.874205,41.739590,41.535702,-44.940529,-34.320896,13.974545
25709,8Z1F_T_83,C,83,14.976178,-36.009369,-49.506886,37.684425,-42.447384,-0.576920,-29.994040,49.853336,-2.577624,-16.046564,36.524963,44.903652,-44.874931,-36.186527,8.591598
25710,8Z1F_T_84,A,84,14.131742,-30.843573,-52.933029,39.631763,-41.550369,3.707016,-32.971275,46.820107,-2.393400,-19.028858,37.618484,44.520214,-46.720409,-33.855186,4.451171
25711,8Z1F_T_85,U,85,10.865595,-29.266909,-54.795082,37.659523,-41.421227,4.350082,-35.782402,45.110916,-2.816792,-19.209013,37.813499,47.467690,-48.962608,-32.027225,2.451082


In [14]:
train_labels = pd.read_csv("/kaggle/input/stanford-rna-3d-folding/train_labels.csv")

In [15]:
train_labels.rename(columns={'x_1':'x_1_true','y_1':'y_1_true','z_1':'z_1_true'}, inplace=True)

In [16]:
merged_df = submission.merge(train_labels[['x_1_true','y_1_true','z_1_true','ID']], on=["ID"], how="left")

In [17]:
merged_df

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,ID,resname,resid,x_1,y_1,z_1,x_2,y_2,z_2,x_3,...,z_3,x_4,y_4,z_4,x_5,y_5,z_5,x_1_true,y_1_true,z_1_true
0,1FFK_9_1,U,1,7.699738,-20.951588,-10.459291,-9.077475,-22.036715,7.003453,18.252377,...,-3.145502,-8.053536,21.395567,-8.530622,6.948162,17.010744,-16.263878,50.841000,14.916,99.295998
1,1FFK_9_2,U,2,2.604925,-26.583666,-5.402189,-3.743415,-23.423130,14.061972,16.348692,...,2.953931,-12.740324,17.967844,-15.564795,16.367758,17.352926,-13.859782,52.405998,10.401,106.478996
2,1FFK_9_3,A,3,-1.858269,-20.648901,-8.629856,-7.242588,-16.901472,14.087174,11.164626,...,-2.560803,-14.293916,14.972135,-8.315606,10.474780,17.613205,-8.031006,57.773998,14.789,103.852997
3,1FFK_9_4,G,4,2.482862,-16.927279,-14.155450,-13.652080,-16.734066,9.111277,13.976948,...,-9.066764,-11.856204,19.577190,-2.195501,2.895576,19.112049,-10.485373,57.311001,16.924,96.344002
4,1FFK_9_5,G,5,4.035524,-12.544548,-17.200239,-16.773188,-14.135696,5.333009,13.538073,...,-13.884682,-9.106638,19.990612,3.029280,-2.998585,18.802664,-10.422861,57.646999,20.913,92.202003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25708,8Z1F_T_82,U,82,17.125757,-37.430660,-46.279278,38.029881,-39.412159,-3.585679,-26.464600,...,-4.146244,-14.874205,41.739590,41.535702,-44.940529,-34.320896,13.974545,,,
25709,8Z1F_T_83,C,83,14.976178,-36.009369,-49.506886,37.684425,-42.447384,-0.576920,-29.994040,...,-2.577624,-16.046564,36.524963,44.903652,-44.874931,-36.186527,8.591598,,,
25710,8Z1F_T_84,A,84,14.131742,-30.843573,-52.933029,39.631763,-41.550369,3.707016,-32.971275,...,-2.393400,-19.028858,37.618484,44.520214,-46.720409,-33.855186,4.451171,,,
25711,8Z1F_T_85,U,85,10.865595,-29.266909,-54.795082,37.659523,-41.421227,4.350082,-35.782402,...,-2.816792,-19.209013,37.813499,47.467690,-48.962608,-32.027225,2.451082,,,


In [18]:
merged_df.to_csv("ribonanzanet2_oof.csv", index=False)