In [1]:
import requests
import zipfile
import io
import csv
from pathlib import Path
import pandas as pd

In [2]:
# URL of the GenAge human genes dataset (tab-delimited within a ZIP)  
# (You might need to check current URL if updated.)  
URL = "https://genomics.senescence.info/genes/human_genes.zip"  

In [3]:
def download_and_extract(url, output_path=None):
    print("Downloading", url)
    resp = requests.get(url)
    resp.raise_for_status()
    z = zipfile.ZipFile(io.BytesIO(resp.content))
    # List files in zip
    print("Contents:", z.namelist())
    # Choose the CSV or tab-delimited file (GenAge now provides CSV format)
    fname = next((f for f in z.namelist() if f.lower().endswith((".csv", ".txt", ".tsv", ".dat"))), None)
    if fname is None:
        raise RuntimeError("Cannot find appropriate file inside ZIP")
    print("Extracting", fname)
    data = z.read(fname)
    target_path = Path(output_path) if output_path else Path(fname)
    target_path.parent.mkdir(parents=True, exist_ok=True)
    target_path.write_bytes(data)
    print("Saved to", target_path)
    lines = data.decode('utf-8').splitlines()
    return lines, target_path


In [4]:
lines, raw_csv_path = download_and_extract(URL)
print("Raw CSV saved to", raw_csv_path)


Downloading https://genomics.senescence.info/genes/human_genes.zip
Contents: ['genage_human.csv', 'release.html']
Extracting genage_human.csv
Saved to genage_human.csv
Raw CSV saved to genage_human.csv


In [6]:
df = pd.read_csv('genage_human.csv')

In [7]:
df.head()

Unnamed: 0,GenAge ID,symbol,name,entrez gene id,uniprot,why
0,1,GHR,growth hormone receptor,2690,GHR_HUMAN,mammal
1,2,GHRH,growth hormone releasing hormone,2691,SLIB_HUMAN,mammal
2,3,SHC1,SHC (Src homology 2 domain containing) transfo...,6464,SHC1_HUMAN,mammal
3,4,POU1F1,POU class 1 homeobox 1,5449,PIT1_HUMAN,mammal
4,5,PROP1,PROP paired-like homeobox 1,5626,PROP1_HUMAN,mammal


In [8]:
df.shape

(307, 6)