In [1]:
import os
import json

import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt

from linear_modeling import *

### Load data and sample metadata

In [2]:
data_path = os.path.join(
    os.getcwd(),
    "..",
    "data",
    "S_cereviseae_compendia_recount_bio"
)

In [3]:
with open(
        os.path.join(data_path, 'aggregated_metadata.json'), 'r') as jsonfile:
    metadata_file = json.load(jsonfile)

tables = {}
for k, v in metadata_file['experiments'].items():
    tables[v["accession_code"]] = v
    
metadata = pd.DataFrame(tables).T

In [4]:
data = pd.read_csv(os.path.join(data_path, "SACCHAROMYCES_CEREVISIAE.tsv"), sep="\t", header=0, index_col=0).T
print("Loaded data with dimensions:", str(data.shape))

Loaded data with dimensions: (12428, 5370)


### Load gene metadata

In [5]:
# Get metadata for genes and extract genes with "transporter" annotation
gene_mapper = pd.read_csv(os.path.join(
    os.getcwd(),
    "..",
    "data",
    "yeast_orf_dict.csv"
), header=None, names=["id", "symbol", "name", "description"])

In [6]:
gene_map = {}
for i, r in gene_mapper.iterrows():
    _id = str(r["id"])
    _name = str(r["symbol"])
    if _name != "nan":
        gene_map[_id] = _name
    else:
        gene_map[_id] = _id

In [7]:
names = [
    "MCT1",
    "SIT4",
    "PHO85",
    "PHO84",
    "PHO4",
    "REG1",
    "PHO80",
    "PHO81",
    "PHO89",
    "PHO2",
    "PHO87",
    "PHO90",
]

In [8]:
corr_data = pd.DataFrame()

In [9]:
for x in names:
    try:
        print("***", x, "***")
        _this = gene_mapper.loc[gene_mapper["symbol"] == x]["id"].tolist()[0]
        _sort = data.corr()[_this].sort_values(ascending=False)
        
        corr_data[x + "_id"] = _sort.index.tolist()
        corr_data[x + "_name"] = _sort.index.map(gene_map).tolist()
        corr_data[x + "_r_value"] = _sort.tolist()
        
    except:
        print("Skipping", x, "...")

*** MCT1 ***
*** SIT4 ***
*** PHO85 ***
*** PHO84 ***
*** PHO4 ***
*** REG1 ***
*** PHO80 ***
*** PHO81 ***
*** PHO89 ***
*** PHO2 ***
*** PHO87 ***
*** PHO90 ***


In [10]:
corr_data.to_csv(os.path.join(
    os.getcwd(),
    "..",
    "data",
    "gene_correlations.tsv"
), sep="\t")