In [1]:
import sys
sys.path.append('../')

import pandas as pd
from pyseus import basic_processing as ip

### Before processing, samples need to be in the format of experiment_sample_rep#

In [2]:
# Specify directory and filename to import the protein groups table
root = '../data/20210317_Organelle_IP_StartingMaterial_Bead_Amount/'
pg_file = 'proteinGroups.txt'
analysis = ''
intensity_type = ''

raw = ip.RawTables(root, analysis, intensity_type, pg_file=pg_file)

# Extract column names
col_names = list(raw.raw_table)
intensity_cols = [x for x in col_names if 'Intensity' in x]

In [3]:
# Current format of intensity columns
intensity_cols

['Intensity',
 'Intensity 10cm_Format_20ul_Beads_LAMP1_rep_1_2ul',
 'Intensity 10cm_Format_20ul_Beads_LAMP1_rep_2_2ul',
 'Intensity 10cm_Format_20ul_Beads_LAMP1_rep_3_2ul',
 'Intensity 10cm_Format_20ul_Beads_PEX3_rep_1_2ul',
 'Intensity 10cm_Format_20ul_Beads_PEX3_rep_2_2ul',
 'Intensity 10cm_Format_20ul_Beads_PEX3_rep_3_2ul',
 'Intensity 10cm_Format_20ul_Beads_TOMM20_rep_1_2ul',
 'Intensity 10cm_Format_20ul_Beads_TOMM20_rep_2_2ul',
 'Intensity 10cm_Format_20ul_Beads_TOMM20_rep_3_2ul',
 'Intensity 10cm_Format_20ul_Beads_WT_rep_1_2ul',
 'Intensity 10cm_Format_20ul_Beads_WT_rep_2_2ul',
 'Intensity 10cm_Format_20ul_Beads_WT_rep_3_2ul',
 'Intensity 6well_Format_20ul_Beads_LAMP1_rep_1_2ul',
 'Intensity 6well_Format_20ul_Beads_LAMP1_rep_2_2ul',
 'Intensity 6well_Format_20ul_Beads_LAMP1_rep_3_2ul',
 'Intensity 6well_Format_20ul_Beads_PEX3_rep_1_2ul',
 'Intensity 6well_Format_20ul_Beads_PEX3_rep_2_2ul',
 'Intensity 6well_Format_20ul_Beads_PEX3_rep_3_2ul',
 'Intensity 6well_Format_20ul_Beads_TO

In [8]:
# This is a list of regular expressions that will be changed.
re = ['_Format_', '_Beads', '3_4_ul', '_2ul', '_rep']
# This is a list of replacement strings. 
replacement_re=['-Format-','-Beads', '3-4ul', '', '']

# The method will reformat strings by the order given in the previous list
sample_cols = ip.sample_rename(intensity_cols, RE=re, replacement_RE=replacement_re, repl_search=False)

In [9]:
# This is a sample of how the new sample names will look like
sample_cols

['Intensity',
 'Intensity 10cm-Format-20ul-Beads_LAMP1_1',
 'Intensity 10cm-Format-20ul-Beads_LAMP1_2',
 'Intensity 10cm-Format-20ul-Beads_LAMP1_3',
 'Intensity 10cm-Format-20ul-Beads_PEX3_1',
 'Intensity 10cm-Format-20ul-Beads_PEX3_2',
 'Intensity 10cm-Format-20ul-Beads_PEX3_3',
 'Intensity 10cm-Format-20ul-Beads_TOMM20_1',
 'Intensity 10cm-Format-20ul-Beads_TOMM20_2',
 'Intensity 10cm-Format-20ul-Beads_TOMM20_3',
 'Intensity 10cm-Format-20ul-Beads_WT_1',
 'Intensity 10cm-Format-20ul-Beads_WT_2',
 'Intensity 10cm-Format-20ul-Beads_WT_3',
 'Intensity 6well-Format-20ul-Beads_LAMP1_1',
 'Intensity 6well-Format-20ul-Beads_LAMP1_2',
 'Intensity 6well-Format-20ul-Beads_LAMP1_3',
 'Intensity 6well-Format-20ul-Beads_PEX3_1',
 'Intensity 6well-Format-20ul-Beads_PEX3_2',
 'Intensity 6well-Format-20ul-Beads_PEX3_3',
 'Intensity 6well-Format-20ul-Beads_TOMM20_1',
 'Intensity 6well-Format-20ul-Beads_TOMM20_2',
 'Intensity 6well-Format-20ul-Beads_TOMM20_3',
 'Intensity 6well-Format-20ul-Beads_WT_1'

### If the formatting is proper, change the column names of the table and save it

In [10]:
renamed = ip.rename_columns(raw.raw_table, RE=re, replacement_RE=replacement_re, repl_search=False)

# Save to csv. use the altered pg_file name in the processing
renamed.to_csv(root + 'proteinGroups_renamed.txt', sep='\t')