In [1]:
import sys
sys.path.append('../')

import pandas as pd
from pyseus import basic_processing as ip

### Before processing, samples need to be in the format of experiment_sample_rep#

In [24]:
# Specify directory and filename to import the protein groups table
root = '../data/20210719_Infected_Cell_All_Samples/'
pg_file = 'proteinGroups_tweeked.txt'
analysis = ''
intensity_type = ''

raw = ip.RawTables(root, analysis, intensity_type, pg_file=pg_file)

# Extract column names
col_names = list(raw.raw_table)
intensity_cols = [x for x in col_names if 'intensity' in x]

In [25]:
# Current format of intensity columns
intensity_cols

['LFQ intensity GPR107_infected_1',
 'LFQ intensity GPR107_infected_2',
 'LFQ intensity GPR107_infected_3',
 'LFQ intensity GPR107_uninfected_1',
 'LFQ intensity GPR107_uninfected_2',
 'LFQ intensity GPR107_uninfected_3',
 'LFQ intensity LAMP1_infected_1',
 'LFQ intensity LAMP1_infected_2',
 'LFQ intensity LAMP1_infected_3',
 'LFQ intensity LAMP1_uninfected_1',
 'LFQ intensity LAMP1_uninfected_2',
 'LFQ intensity LAMP1_uninfected_3',
 'LFQ intensity PEX3_infected_1',
 'LFQ intensity PEX3_infected_2',
 'LFQ intensity PEX3_infected_3',
 'LFQ intensity PEX3_uninfected_1',
 'LFQ intensity PEX3_uninfected_2',
 'LFQ intensity PEX3_uninfected_3',
 'LFQ intensity RTN4_infected_1',
 'LFQ intensity RTN4_infected_2',
 'LFQ intensity RTN4_infected_3',
 'LFQ intensity RTN4_uninfected_1',
 'LFQ intensity RTN4_uninfected_2',
 'LFQ intensity RTN4_uninfected_3',
 'LFQ intensity TOMM20_infected_1',
 'LFQ intensity TOMM20_infected_2',
 'LFQ intensity TOMM20_infected_3',
 'LFQ intensity TOMM20_uninfected_

In [18]:
# This is a list of regular expressions that will be changed.
re = ['_infected', '_uninfected', 'ntensity ']
# This is a list of replacement strings. 
replacement_re=['-infected', '-uninfected', 'ntensity 20210719_']

# The method will reformat strings by the order given in the previous list
sample_cols = ip.sample_rename(intensity_cols, RE=re, replacement_RE=replacement_re, repl_search=False)

In [19]:
# This is a sample of how the new sample names will look like
sample_cols

['LFQ intensity 20210719_GPR107-infected_1',
 'LFQ intensity 20210719_GPR107-infected_2',
 'LFQ intensity 20210719_GPR107-infected_3',
 'LFQ intensity 20210719_GPR107-uninfected_1',
 'LFQ intensity 20210719_GPR107-uninfected_2',
 'LFQ intensity 20210719_GPR107-uninfected_3',
 'LFQ intensity 20210719_LAMP1-infected_1',
 'LFQ intensity 20210719_LAMP1-infected_2',
 'LFQ intensity 20210719_LAMP1-infected_3',
 'LFQ intensity 20210719_LAMP1-uninfected_1',
 'LFQ intensity 20210719_LAMP1-uninfected_2',
 'LFQ intensity 20210719_LAMP1-uninfected_3',
 'LFQ intensity 20210719_PEX3-infected_1',
 'LFQ intensity 20210719_PEX3-infected_2',
 'LFQ intensity 20210719_PEX3-infected_3',
 'LFQ intensity 20210719_PEX3-uninfected_1',
 'LFQ intensity 20210719_PEX3-uninfected_2',
 'LFQ intensity 20210719_PEX3-uninfected_3',
 'LFQ intensity 20210719_RTN4-infected_1',
 'LFQ intensity 20210719_RTN4-infected_2',
 'LFQ intensity 20210719_RTN4-infected_3',
 'LFQ intensity 20210719_RTN4-uninfected_1',
 'LFQ intensity 

### If the formatting is proper, change the column names of the table and save it

In [28]:
renamed = ip.rename_columns(raw.raw_table, RE=re, replacement_RE=replacement_re, repl_search=False)

# Save to csv. use the altered pg_file name in the processing
renamed.to_csv(root + 'proteinGroups_renamed_t.txt', sep='\t')

In [29]:
renamed.dtypes

LFQ intensity 20210719_GPR107-infected_1         object
LFQ intensity 20210719_GPR107-infected_2         object
LFQ intensity 20210719_GPR107-infected_3         object
LFQ intensity 20210719_GPR107-uninfected_1       object
LFQ intensity 20210719_GPR107-uninfected_2      float64
LFQ intensity 20210719_GPR107-uninfected_3      float64
LFQ intensity 20210719_LAMP1-infected_1         float64
LFQ intensity 20210719_LAMP1-infected_2         float64
LFQ intensity 20210719_LAMP1-infected_3         float64
LFQ intensity 20210719_LAMP1-uninfected_1       float64
LFQ intensity 20210719_LAMP1-uninfected_2       float64
LFQ intensity 20210719_LAMP1-uninfected_3       float64
LFQ intensity 20210719_PEX3-infected_1          float64
LFQ intensity 20210719_PEX3-infected_2          float64
LFQ intensity 20210719_PEX3-infected_3          float64
LFQ intensity 20210719_PEX3-uninfected_1        float64
LFQ intensity 20210719_PEX3-uninfected_2        float64
LFQ intensity 20210719_PEX3-uninfected_3        