# Methylation

## Import packages

In [4]:
import pandas as pd
import pyaging as pya

## Download and load example data

In [5]:
pya.data.download_example_data('methylation')

|-----> 🏗️ Starting download_example_data function
|-----------> ⚙️ Download example data started
|-----------------> Data found in ./pyaging_data/GSE139307.pkl
|-----------> ✅ Download example data finished [0.0027s]
|-----> 🎉 Done! [0.0045s]


In [6]:
df = pd.read_pickle('pyaging_data/GSE139307.pkl')

In [7]:
df['female'] = (df['gender'] == 'F').astype(int)

In [8]:
df.head()

Unnamed: 0,dataset,tissue_type,age,gender,cg00000029,cg00000108,cg00000109,cg00000165,cg00000236,cg00000289,...,ch.X.938089F,ch.X.94051109R,ch.X.94260649R,ch.X.967194F,ch.X.97129969R,ch.X.97133160R,ch.X.97651759F,ch.X.97737721F,ch.X.98007042R,female
GSM4137709,GSE139307,sperm,84.0,M,0.084811,0.920696,0.856851,0.084567,0.838699,0.247273,...,0.045942,0.037631,0.056455,0.249872,0.049022,0.085691,0.037435,0.07782,0.106234,0
GSM4137710,GSE139307,sperm,69.0,M,0.099626,0.919073,0.890024,0.115541,0.852584,0.198103,...,0.041849,0.032573,0.08979,0.250245,0.079095,0.079756,0.046229,0.091256,0.120241,0
GSM4137711,GSE139307,sperm,69.0,M,0.117228,0.920276,0.894317,0.117127,0.839258,0.21341,...,0.049515,0.058097,0.079919,0.299758,0.079305,0.089815,0.065364,0.086864,0.156005,0
GSM4137712,GSE139307,sperm,69.0,M,0.077096,0.910204,0.9084,0.073885,0.861615,0.163276,...,0.033289,0.038836,0.108213,0.295428,0.050731,0.099943,0.047597,0.07848,0.10748,0
GSM4137713,GSE139307,sperm,67.0,M,0.063524,0.911608,0.884643,0.079877,0.864654,0.176169,...,0.038411,0.048787,0.088631,0.316694,0.041873,0.079303,0.048823,0.08901,0.117903,0


In [9]:
# needs only numerical data (doesn't work with strings)
df = df.drop(['gender', 'tissue_type', 'dataset'], axis=1)

## Convert data to AnnData object

In [10]:
adata = pya.pp.df_to_adata(df, imputer_strategy='mean')

|-----> 🏗️ Starting df_to_adata function
|-----> ⚙️ Impute missing values started
|-----------> Imputing missing values using mean strategy
|-----> ✅ Impute missing values finished [0.1147s]
|-----> ⚙️ Log data statistics started
|-----------> There are 37 observations
|-----------> There are 485514 features
|-----------> Total missing values: 0
|-----------> Percentage of missing values: 0.00%
|-----> ✅ Log data statistics finished [0.2208s]
|-----> ⚙️ Create anndata object started
|-----> ✅ Create anndata object finished [0.0169s]
|-----> ⚙️ Add metadata to anndata started
|-----------? No metadata provided. Leaving adata.obs empty
|-----> ⚠️ Add metadata to anndata finished [0.0336s]
|-----> ⚙️ Add unstructured data to anndata started
|-----> ✅ Add unstructured data to anndata finished [0.0009s]
|-----> 🎉 Done! [0.3873s]


## Predict age with aging clocks

In [11]:
adata = pya.pred.predict_age(adata, ['altumage', 'horvath2013', 'pchorvath2013', 'dunedinpace', 'replitali', 'skinandblood', 'mammalian2'])

|-----> 🏗️ Starting predict_age function
|-----> ⚙️ Set PyTorch device started
|-----------> Using device: cpu
|-----> ✅ Set PyTorch device finished [0.0009s]
|-----> Processing clock: altumage
|-----------> ⚙️ Load clock started
|-----------------> Data found in ./pyaging_data/altumage.pt
|-----------> ✅ Load clock finished [0.0033s]
|-----------> ⚙️ Check features in adata started
|-----------> All features are present in adata.var_names.
|-----------> ✅ Check features in adata finished [0.0107s]
|-----------> ⚙️ Convert adata.X to torch.tensor and filter features started
|-----------> ✅ Convert adata.X to torch.tensor and filter features finished [0.0167s]
|-----------> ⚙️ Preprocess data started
|-----------------> Preprocessing data with function scale
|-----------> ✅ Preprocess data finished [0.0162s]
|-----------> ⚙️ Initialize model started
|-----------> ✅ Initialize model finished [0.0048s]
|-----------> ⚙️ Predict ages with model started
|-----------> ✅ Predict ages with mode

In [12]:
adata.obs

Unnamed: 0,altumage,horvath2013,pchorvath2013,dunedinpace,replitali,skinandblood,mammalian2
GSM4137709,37.007023,33.624774,4.672973,1.152685,87.261047,2.354882,0.0071
GSM4137710,29.426702,28.829343,2.086035,1.031971,87.671936,1.060467,0.007577
GSM4137711,22.805416,28.316548,2.549487,1.063786,86.664955,1.289146,0.003947
GSM4137712,18.06007,24.850635,1.164793,1.142039,86.230934,0.59546,0.003872
GSM4137713,20.071888,25.942114,1.491444,1.115847,87.336739,0.786787,0.004309
GSM4137714,15.904675,27.717805,1.116081,1.180822,86.192078,0.509315,0.003994
GSM4137715,17.618645,25.023288,1.132691,1.120775,87.315369,0.786464,0.004123
GSM4137716,18.396324,26.629503,1.122279,1.201142,86.422653,0.336474,0.004455
GSM4137717,25.673313,27.471301,1.18077,1.20864,86.630707,0.562296,0.004236
GSM4137718,15.824392,26.320859,1.132404,1.126852,86.628609,0.730006,0.004447


## Get citation

In [13]:
adata.uns['altumage_metadata']

{'species': 'Homo sapiens',
 'data_type': 'methylation',
 'year': 2022,
 'citation': 'de Lima Camillo, Lucas Paulo, Louis R. Lapierre, and Ritambhara Singh. "A pan-tissue DNA-methylation epigenetic clock based on deep learning." npj Aging 8.1 (2022): 4.',
 'doi': 'https://doi.org/10.1038/s41514-022-00085-y'}