In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import scanpy as sc
import h5py
import sys

print(f'PID number is {os.getpid()}')

import matplotlib as mpl

figure_folder = r'figures'

mpl.rc('figure', dpi=300)
mpl.rc('figure', titlesize=10.) 
mpl.rc('figure.constrained_layout', use=True)

mpl.rc('axes', labelsize=8.)
mpl.rc('axes', titlesize=9.)
mpl.rc('axes', linewidth=1)

mpl.rc('font', family='sans-serif')

mpl.rc('savefig', dpi=300)
mpl.rc('savefig', format='svg')

mpl.rc('xtick', labelsize=7.)
mpl.rc('xtick.major', size=3)
mpl.rc('xtick.major', width=1.)

mpl.rc('ytick', labelsize=7.)
mpl.rc('ytick.major', size=3)
mpl.rc('ytick.major', width=1.)

PID number is 36456


# Load gene info and radial position

In [2]:
df_gene = pd.read_csv(r'resources/gene_info.csv', index_col=0)
df_gene['position'] = (df_gene['start'] + df_gene['end'])/2
df_gene.head()

Unnamed: 0,gene,chr,start,end,position
0,0610006L08Rik,chr7,74818817,74853813,74836315.0
1,0610007P14Rik,chr12,85815447,85824550,85819998.5
2,0610009B22Rik,chr11,51685385,51688874,51687129.5
3,0610009E02Rik,chr2,26445695,26459390,26452542.5
4,0610009L18Rik,chr11,120348677,120351190,120349933.5


In [3]:
df_rp = pd.read_csv(r"C:\Users\cosmosyw\Documents\Research\Mecp2_v2\results\radial_positions\MOp_majortype_Rp.csv", index_col=0)
df_rp.head()

Unnamed: 0,loci_name,AB,MOp_RP,majorType
0,chr1_3742742_3759944,B,0.824207,Gluta
1,chr1_6245958_6258969,B,0.824798,Gluta
2,chr1_8740008_8759916,B,0.827609,Gluta
3,chr1_9627926_9637875,B,0.81069,Gluta
4,chr1_9799472_9811359,B,0.806001,Gluta


In [4]:
df_rp['chr'] = df_rp['loci_name'].apply(lambda x: x.split('_')[0])
df_rp['start'] = df_rp['loci_name'].apply(lambda x: int(x.split('_')[1]))
df_rp['end'] = df_rp['loci_name'].apply(lambda x: int(x.split('_')[2]))
df_rp['position'] = (df_rp['start'] + df_rp['end'])/2
df_rp.head()

Unnamed: 0,loci_name,AB,MOp_RP,majorType,chr,start,end,position
0,chr1_3742742_3759944,B,0.824207,Gluta,chr1,3742742,3759944,3751343.0
1,chr1_6245958_6258969,B,0.824798,Gluta,chr1,6245958,6258969,6252463.5
2,chr1_8740008_8759916,B,0.827609,Gluta,chr1,8740008,8759916,8749962.0
3,chr1_9627926_9637875,B,0.81069,Gluta,chr1,9627926,9637875,9632900.5
4,chr1_9799472_9811359,B,0.806001,Gluta,chr1,9799472,9811359,9805415.5


In [None]:
majorTypes = ['Gluta', 'GABA', 'Astro', 'Oligo', 'Micro', 'Endo']

gene_rp_dfs = []

for sel_type in majorTypes:
    
    df_rp_type = df_rp[df_rp['majorType']==sel_type].copy()
    df_output = df_gene.copy()
    rps = []
    
    for i, row in tqdm(df_gene.iterrows()):
        df = df_rp_type[df_rp_type['chr']==row['chr']].copy()
        if len(df)>0:
            df['distance'] = np.abs(df['position']-row['position'])
            df.sort_values('distance', inplace=True)
            if df['distance'].values[0]>=3000000:
                rps.append(-1)
            else:
                rps.append(df['MOp_RP'].values[0])
        else:
            rps.append(-1)
        
    df_output['radial_position'] = rps
    df_output['majortype'] = sel_type
    
    gene_rp_dfs.append(df_output)

df_rp_gene = pd.concat(gene_rp_dfs, ignore_index=True)
df_rp_gene.head()

25247it [00:16, 1566.71it/s]
25247it [00:15, 1598.77it/s]
25247it [00:15, 1602.25it/s]
13440it [00:08, 1593.72it/s]

In [None]:
df_rp_gene.to_csv('resources/gene_rp_by_majortype.csv')