In [1]:
import torch
import pandas as pd
import pyaging as pya
import os

In [2]:
# from authors
cpg_sites = [
    "(Intercept)",
    'cg19283806',
    'cg11807280',
    'cg00329615',
    'cg22454769',
    'cg16867657',
    'cg22796704',
    'cg09809672',
    'cg18618815',
    'cg25533247',
    'cg02286081',
    'cg20222376',
    'cg19344626',
    'cg07082267',
    'cg15845821',
    'cg11741201',
    'cg16054275',
    'cg18933331',
    'cg20249566',
    'cg16604658',
    'cg07583137',
    'cg16008966',
    'cg14556683',
    'cg03746976',
    'cg14314729',
    'cg03431918',
    'cg22156456',
    'cg23078123',
    'cg09748749',
    'cg17457912',
    'cg06492796',
    'cg17593342',
    'cg05308819',
    'cg22512670',
    'cg01820962',
    'cg06639320',
    'cg03224418',
    'cg17436656',
    'cg19500607',
    'cg03735592',
    'cg20669012',
    'cg19761273',
    'cg07080372',
    'cg03638795',
    'cg19722847',
    'cg24711336',
    'cg26935102',
    'cg10221746',
    'cg02085953',
    'cg04604946',
    'cg08558886',
    'cg22361181',
    'cg04208403',
    'cg12623930',
    'cg21572722',
    'cg17885226',
    'cg00748589',
    'cg13033938',
    'cg19784428',
    'cg22016779',
    'cg01974375',
    'cg25256723',
    'cg24724428',
    'cg07547549',
    'cg25410668',
    'cg21296230'
]

coefficients = [
    0.711184864,
    -0.588354066,
    -0.212038592,
    0.014351188,
    0.051285529,
    2.152191741,
    -0.689940565,
    -0.643729974,
    -0.772516118,
    0.116662569,
    -0.233409678,
    0.002802259,
    -0.062172432,
    -0.224027294,
    1.535209377,
    0.344367661,
    0.188826525,
    -0.409150014,
    -0.776065004,
    0.500336643,
    0.06125005,
    -0.391624093,
    0.100449175,
    0.02000403,
    0.266044453,
    -0.259829677,
    0.254063071,
    -0.726178338,
    -1.141947121,
    -0.06322441,
    -0.196926134,
    0.85613244,
    -0.887977059,
    -0.334654336,
    -0.854110638,
    1.916122401,
    0.92208575,
    -0.070665617,
    0.524707402,
    0.319375235,
    0.376055859,
    0.033361038,
    -1.458360975,
    -0.267930475,
    -0.590085273,
    0.642506165,
    0.470352872,
    0.273581649,
    -0.637989789,
    -1.109388991,
    -0.16886654,
    0.662451226,
    -0.091891613,
    0.086290028,
    -0.426089316,
    0.32615363,
    2.535639458,
    -3.626802894,
    0.097619541,
    -0.427604263,
    -0.41418774,
    -0.27412342,
    0.703772384,
    -0.110027226,
    0.283649813,
    0.928585964
]

In [3]:
df = pd.DataFrame({
    'feature': cpg_sites,
    'coefficient': coefficients
})

In [4]:
features = df['feature'][1:].tolist()

weights = torch.tensor(df['coefficient'][1:].tolist()).unsqueeze(0)
intercept = torch.tensor([df['coefficient'][0]])

In [5]:
model = pya.models.LinearModel(len(features))

model.linear.weight.data = weights
model.linear.bias.data = intercept

model

LinearModel(
  (linear): Linear(in_features=65, out_features=1, bias=True)
)

In [6]:
weights_dict = {
    'preprocessing': None, 
    'preprocessing_helper': None,
    'postprocessing': 'anti_log_linear',
    'postprocessing_helper': None,
    'features': features,
    'weight_dict': model.state_dict(),
    'model_class': 'LinearModel',
}

metadata_dict = {
    'species': 'Homo sapiens',
    'data_type': 'methylation',
    'year': 2020,
    'implementation_approved_by_author(s)': '⌛',
    'preprocessing': weights_dict['preprocessing'], 
    'postprocessing': weights_dict['postprocessing'], 
    'citation': "Han, Yang, et al. \"New targeted approaches for epigenetic age predictions.\" BMC biology 18 (2020): 1-15.",
    'doi': "https://doi.org/10.1186/s12915-020-00807-2",
    "notes": None,
}

In [7]:
torch.save(weights_dict, '../weights/han.pt')
torch.save(metadata_dict, '../metadata/han.pt')