In [1]:
import sys
sys.path.append('../')

import pandas as pd
from pyseus import basic_processing as ip

### Before processing, samples need to be in the format of experiment_sample_rep#

In [2]:
# Read the pgroups file
root = '../data/OC_Plate_22-25_MBR/'
pg_file = 'proteinGroups.txt'
intensity_type = 'LFQ intensity'

raw = ip.RawTables(experiment_dir=root, intensity_type=intensity_type, pg_file=pg_file)

# Extract column names
col_names = list(raw.pg_table)
intensity_cols = [x for x in col_names if 'LFQ intensity ' in x]
raw.sample_cols = intensity_cols

In [7]:
# Current format of intensity columns
intensity_cols[:5]

['LFQ intensity 20200211_TIMS03_EvoSep04_AnBrAnMi_HumanInteractome_0769_CZBMPI_P022A01_RANBP2_01',
 'LFQ intensity 20200211_TIMS03_EvoSep04_AnBrAnMi_HumanInteractome_0770_CZBMPI_P022B01_COMMD1_01',
 'LFQ intensity 20200211_TIMS03_EvoSep04_AnBrAnMi_HumanInteractome_0771_CZBMPI_P022C01_COMMD2_01',
 'LFQ intensity 20200211_TIMS03_EvoSep04_AnBrAnMi_HumanInteractome_0772_CZBMPI_P022D01_COMMD4_01',
 'LFQ intensity 20200211_TIMS03_EvoSep04_AnBrAnMi_HumanInteractome_0773_CZBMPI_P022E01_COMMD6_01']

In [4]:
# This is a list of regular expressions that will be changed.
# Infected states are part of sample names, and therefore gets a dash instead of underscore
# Experiment ID is added via date '20210719_'

re = ['LFQ.*CZBMPI_', '[A-Z]\d\d_']
# This is a list of replacement strings, in respective order. 
replacement_re=['LFQ intensity ', '_']

# The method will reformat strings by the order given in the previous list
sample_cols = ip.sample_rename(intensity_cols, RE=re, replacement_RE=replacement_re, repl_search=False)

In [5]:
# This is a sample of how the new sample names will look like
sample_cols[:5]

['LFQ intensity P022_RANBP2_01',
 'LFQ intensity P022_COMMD1_01',
 'LFQ intensity P022_COMMD2_01',
 'LFQ intensity P022_COMMD4_01',
 'LFQ intensity P022_COMMD6_01']

### If the formatting is proper, change the column names of the table and save it

In [6]:
raw.rename_columns(RE=re, replacement_RE=replacement_re, repl_search=False)

# Save to csv. use the altered pg_file name in the processing
raw.renamed_table.to_csv(root + 'proteinGroups_renamed.txt', sep='\t')