In [None]:
import cobra
import pandas as pd
import riptide
import os

def parameterize_model(model, transcriptomic, base_filename):
    # Check if the DataFrame has at least two columns
    if transcriptomic.shape[1] < 2:
        raise ValueError(f"The file {base_filename} does not contain enough columns.")

    # Prepare abundance data
    transcriptomic = transcriptomic.iloc[:, [0, 1]]  # Adjusted to correct column indices
    transcriptomic.columns = ['gene', 'value']
    transcriptomic['gene'] = transcriptomic['gene'].astype(str)
    transcriptomic['value'] = pd.to_numeric(transcriptomic['value'], errors='coerce')

    # Check if all values are zero and skip if true
    if (transcriptomic['value'] == 0).all():
        print(f"Skipping processing for {base_filename} as all value entries are zero.")
        return None

    gene_df = pd.read_csv('/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/gene_df_subtilis.tsv', sep='\t')
    if not gene_df['gene'].isin(transcriptomic['gene']).any():
        print(f"Warning: No matching genes found in {base_filename}.")
        return None  # Exit if no genes match, prevents further errors

    filtered_gene_df = gene_df[gene_df['gene'].isin(transcriptomic['gene'])]
    merged_data = pd.merge(filtered_gene_df, transcriptomic, on='gene', how='inner')
    if merged_data.empty:
        print(f"Warning: No data left after merging in {base_filename}.")
        return None  # Exit if merging results in no data

    merged_data = merged_data.drop(columns=['gene'])

    intermediate_path = f'/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/riptide_transcripts/{base_filename}_updated_abundance.tsv'
    merged_data.to_csv(intermediate_path, sep='\t', index=False)

    # Parameterize model using Riptide
    tr = riptide.read_transcription_file(intermediate_path, header=True)
    output = riptide.maxfit(model, tr)

    return output

def save_dict_as_tsv(data_dict, output_path):
    # Handling both scalar and list-like dictionary values
    if all(isinstance(value, (int, float, str)) for value in data_dict.values()):
        df = pd.DataFrame(list(data_dict.values()), index=data_dict.keys(), columns=['Value'])
    else:
        df = pd.DataFrame(data_dict)

    df.to_csv(output_path, sep='\t', index=False)
    print(f"Data saved at {output_path}")

model = cobra.io.read_sbml_model('/Users/yahyafarooqi/Documents/Code/AntiGEM/models/subtilis_ iYO844.xml')
medium = model.medium
medium['EX_glc__D_e'] = 10
medium['EX_trp__L_e'] = 1
medium['EX_glu__L_e'] = 1
medium['EX_cit_e'] = 1
model.medium = medium

# Paths and directories setup
path = '/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/transcriptome_data/test_set'
output_directory_1 = '/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/c_fluxes'
output_directory_2 = '/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/c_fit'
model_save_directory = '/Users/yahyafarooqi/Documents/Code/AntiGEM/Data/intermediate/constrained_models'

# Ensure the output directories exist
os.makedirs(output_directory_1, exist_ok=True)
os.makedirs(output_directory_2, exist_ok=True)
os.makedirs(model_save_directory, exist_ok=True)

# Loop through all .tsv files in the directory
for filename in os.listdir(path):
    if filename.endswith('.tsv'):
        file_path = os.path.join(path, filename)
        transcriptomic_file = pd.read_csv(file_path, sep='\t')
        base_filename = '.'.join(filename.split('.')[:-1])

        # Assuming parameterize_model returns an object with attributes 'model' and 'concordance'
        object = parameterize_model(model, transcriptomic_file, base_filename)
        if object:
            c_model = object.model

            # Save the constrained model as an XML file
            model_save_path = os.path.join(model_save_directory, f'{base_filename}_constrained.xml')
            cobra.io.write_sbml_model(c_model, model_save_path)
            print(f"Constrained model saved as XML for {base_filename}")

            # Perform flux balance analysis
            solution = cobra.flux_analysis.pfba(c_model)
            flux_output_file_path = os.path.join(output_directory_1, f'{base_filename}_fluxes.tsv')
            solution.fluxes.to_csv(flux_output_file_path, sep='\t')
            print(f"Flux analysis results saved for {base_filename}")

            # Saving concordance dictionary
            fit_output_file_path = os.path.join(output_directory_2, f'{base_filename}_fit.tsv')
            save_dict_as_tsv(object.concordance, fit_output_file_path)

Running max fit RIPTiDe for objective fraction range: 0.1 to 0.9...
Analyzing context-specific subnetwork flux ranges...
Progress: 40.0% 