# Integrating BARseq data

In this Jupyter notebook, we will guide you through the steps for integrating injection area information from BARseq with its associated MAPseq data. 

# Imports

In [1]:
# Built-in python libraries
import logging
import os
import sys
from configparser import ConfigParser

# Data science libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For handling barcode tags or other fields with letters and numbers. 
from natsort import natsorted

# Allowing to run our custom libraries from git area. 
gitpath=os.path.expanduser("~/git/mapseq-processing")
sys.path.append(gitpath)
from mapseq.core import *
from mapseq.barcode import *
from mapseq.utils import *
from mapseq.bowtie import *
from mapseq.stats import *

gitpath=os.path.expanduser("~/git/mapseq-analysis")
sys.path.append(gitpath)
from msanalysis.analysis import * 
print("Done")

Done


In [8]:
#cp = get_default_config()
configfile = os.path.expanduser('~/mapseq/M205.mapseq.conf')
cp = ConfigParser()
cp.read(configfile)
logging.getLogger().setLevel(logging.INFO)
expid = 'M205.htna24'
outdir = os.path.expanduser('~/mapseq/')
expfile = os.path.expanduser('~/mapseq/barseqdata/expmatrix.tsv')
genesfile = os.path.expanduser('~/mapseq/barseqdata/genes.tsv')
somafile = os.path.expanduser('~/mapseq/barseqdata/soma_bc_data.tsv')
b1file = os.path.expanduser('~/mapseq/YW143.nbcm.tsv')
b2file = os.path.expanduser('~/mapseq/YW144.nbcm.tsv')
print(f"For {expid}:\nconfig={configfile}\noutdir={outdir}")

For M205.htna24:
config=/Users/hover/mapseq/M205.mapseq.conf
outdir=/Users/hover/mapseq/


In [6]:
sbdf = pd.read_csv(somafile, header = 0, index_col = 0, sep='\t') 
sbdf

Unnamed: 0,soma_bc,pos_x,pos_y,pos40x_x,pos40x_y,depth_x,depth_y,id,angle,slice
0,GGGGGGGGGGGGGGG,3615.378821,1444.000000,258.0,2952.0,0.0,0.0,10001,180,1
1,GGGGGGGGGGGGGGG,7381.888812,-3094.339554,36.0,308.0,0.0,0.0,30001,180,1
2,GGGGGGCGGGGGGGG,7409.388812,-3007.839554,91.0,481.0,0.0,0.0,30002,180,1
3,GGGGGGCGGGGGGGG,7434.388812,-2976.339554,141.0,544.0,0.0,0.0,30003,180,1
4,GGGGGGCGGGGGGGG,7467.388812,-2937.339554,207.0,622.0,0.0,0.0,30004,180,1
...,...,...,...,...,...,...,...,...,...,...
235441,GCGGGGGGGGGGGGG,1558.702576,9380.116171,3147.0,2055.0,0.0,0.0,2660188,180,4
235442,GGCCCGGGGGCGGGG,1572.202576,9407.616171,3174.0,2110.0,0.0,0.0,2660189,180,4
235443,CCCCCCGCTGCCCGG,1578.202576,9429.116171,3186.0,2153.0,0.0,0.0,2660190,180,4
235444,GGGGGGGGGGGGGGG,179.702576,9533.116171,389.0,2361.0,0.0,0.0,2660191,180,4


In [7]:
sbdf.dtypes

soma_bc      object
pos_x       float64
pos_y       float64
pos40x_x    float64
pos40x_y    float64
depth_x     float64
depth_y     float64
id            int64
angle         int64
slice         int64
dtype: object

In [11]:
edf = pd.read_csv(expfile, header = 0, index_col = 0, sep='\t') 
edf

Unnamed: 0,Calb1,Rasgrf2,Tafa1,Enpp2,Col19a1,Rorb,Slc24a3,Galntl6,Il1rapl2,Tafa2,...,Cdh18,Gnb4,unused-1,unused-2,unused-3,unused-4,unused-5,Slc17a7,Gad1,Slc30a3
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235441,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
235442,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
235443,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
235444,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [12]:
b1df = pd.read_csv(b1file, header = 0, index_col = 0, sep='\t') 
b1df

Unnamed: 0_level_0,BC1,BC2,BC3,BC4,BC5,BC6,BC7,BC8,BC9,BC10,BC11,BC12
vbc_read_col,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AAAAAACTCTGTGAATGTTTAACTGCAAGT,0.0,0.000000,0.0,76.510817,1.909901,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
AAAAAATCTTCTTCGCCTAGGCGTTCCAGG,0.0,0.000000,0.0,0.000000,0.000000,3.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
AAAAACCGGGTTCCTCTTCTCGGCCATTTA,0.0,0.000000,0.0,0.000000,9.549505,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
AAAAACGAGGGGCGGATGGCGTAGGAGGGC,0.0,0.000000,0.0,0.000000,5.729703,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
AAAAACGGAGGCAACCCATCCCATTCGTTC,0.0,22.627566,0.0,4.637019,1.909901,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
TTTTTTGGGTTCGAATGCGTACTTTTTGGC,0.0,11.313783,0.0,97.377404,0.000000,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
TTTTTTGGTGCGGGATTTTGCTTTTTTTGT,0.0,0.000000,0.0,0.000000,28.648515,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
TTTTTTGTTCTAATATTGAGTGTATTTGAG,0.0,0.000000,0.0,20.866587,0.000000,0.0,0.0,8.27897,0.000000,0.000000,0.0,0.0
TTTTTTTAGCAGGGGCCTATAAGAGTGCAG,0.0,0.000000,0.0,0.000000,5.729703,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0
