In [1]:
import torch
import pandas as pd
import pyaging as pya
import os

In [15]:
os.system("curl -o coefficients.csv https://static-content.springer.com/esm/art%3A10.1007%2Fs11357-023-00986-0/MediaObjects/11357_2023_986_MOESM14_ESM.csv")
os.system("curl -o peaks.tsv https://static-content.springer.com/esm/art%3A10.1007%2Fs11357-023-00986-0/MediaObjects/11357_2023_986_MOESM9_ESM.tsv")

0

In [3]:
df = pd.read_csv('coefficients.csv')

df['feature'] = df.desc.tolist()
df['coefficient'] = df['coef']

In [4]:
features = df['feature'][1::].tolist()

weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'][0]])

In [5]:
model = pya.models.LinearModel(len(features))

model.linear.weight.data = weights
model.linear.bias.data = intercept

model

LinearModel(
  (linear): Linear(in_features=228, out_features=1, bias=True)
)

In [16]:
def parse_genomic_location(loc: str):
    chrom, positions = loc.split(':')
    start, end = map(int, positions.split('-'))
    length = end - start
    return length

peaks = pd.read_table('peaks.tsv')
preprocessing_helper = [parse_genomic_location(peak) for peak in peaks['desc'].tolist()]

In [19]:
weights_dict = {
    'preprocessing': 'tpm_norm_log1p', 
    'preprocessing_helper': preprocessing_helper,
    'postprocessing': None,
    'postprocessing_helper': None,
    'features': features,
    'weight_dict': model.state_dict(),
    'model_class': 'LinearModel',
}

metadata_dict = {
    'species': 'Homo sapiens',
    'data_type': 'atac',
    'year': 2023,
    'implementation_approved_by_author(s)': '⌛',
    'preprocessing': weights_dict['preprocessing'], 
    'postprocessing': weights_dict['postprocessing'], 
    'citation': "Morandini, Francesco, et al. \"ATAC-clock: An aging clock based on chromatin accessibility.\" GeroScience (2023): 1-18.",
    'doi': "https://doi.org/10.1007/s11357-023-00986-0",
    "notes": "This is the model trained on the ATAC data produced by the Ocampo lab plus a public dataset",
}

In [20]:
torch.save(weights_dict, '../weights/ocampoatac1.pt')
torch.save(metadata_dict, '../metadata/ocampoatac1.pt')

In [8]:
os.system("rm coefficients.csv")
os.system("rm peaks.tsv")

0