In [1]:
import pandas as pd
from ftplib import FTP
import os

In [2]:
#read in supplementary table
drugInfo = pd.read_csv("Drug_Info_Supp_Table.csv")
drugInfo["Drug"] = drugInfo["Drug"].str.strip()

#group all drugs by their type and turn into dictionary
drugTypes = drugInfo.groupby("Type")["Drug"].apply(list).to_dict()


In [4]:
# make a table with all mean intensity values for all of the drugs
allDrugs = drugInfo["Drug"].to_list()
combined = pd.DataFrame(columns=["gene"])

# set up FTP 
ftp = FTP("massive-ftp.ucsd.edu")
ftp.login()
ftp.cwd("/v06/MSV000093659/other/Dose response data - Jurkat proteome")

for drug in allDrugs:
    # make a string with the drug name + _ALL.txt 
    drugFileName = f"{drug}_ALL.txt"

    # check if the file already exists locally (already downloaded it)
    if not os.path.exists(drugFileName):

        # if it doesn't already exist, use FTP to get that file from the server
        drugPath = f"{drug}/TXTs_Classified"
        ftp.cwd(drugPath)
        
        # download/write the file in binary mode
        with open(drugFileName, "wb") as file:
        # download the file "RETR fileName"
            ftp.retrbinary(f"RETR {drugFileName}", file.write)
        
        # move back to the Jurkat proteome directory
        ftp.cwd("../../")

    # load the file as a dataframe
    drugData = pd.read_csv(drugFileName, delimiter = "\t") 

    # select the gene and mean intensity columns  
    meanData = drugData[["gene", "Mean_Intensity"]]
    # rename mean intensity column to include the name of the drug
    renamedData = meanData.rename(columns = {"Mean_Intensity":f"{drug}_Mean_Intensity"})
    # add both of those columns to a combined dataframe (Created before the for loop)
    combined = pd.merge(combined, renamedData, how = "outer", on = "gene")

ftp.quit()
combined.set_index("gene")
combined.head()
combined.to_csv("Mean_Intensity_Matrix_All_Drugs.csv", index = False)



In [None]:
# make separate files for mean intensity values based on drug types 

# get combined matrix of values and rename columns to just be the drug names
meanIntensityMatrix = combined
meanIntensityMatrix.columns = meanIntensityMatrix.columns.str.removesuffix("_Mean_Intensity")

# make empty dict for dataframes by type
# keys: drug types, values: dfs of mean intensity vals for drugs in that type
meansByDrugType = {}

# loop through the dictionary of types and drugs
for type, drugNames in drugTypes.items():
    # select columns in the combined matrix for all the drugs in that type
    typeSubset = meanIntensityMatrix[drugNames]
    # save those columns to the dict of types/values
    meansByDrugType[type] = typeSubset

# # print out meansByDrugType dictionary:
# for type, df in meansByDrugType.items():
#     print(f"type: {type}\n{df.head()}")

# save the df for each type as a csv 
for type, df in meansByDrugType.items():
    df.to_csv(f"Mean_Intensity_Matrix_{type}.csv", index = False)

In [8]:
# make separate files for ic50 values based on drug types
# OR, use the table from salma —> go thru the dict by drug type, select all the columns for the drugs in that type and
    # save them in a new data frame with the name of that drug type. 

# get combined matrix of values
ic50Matrix = pd.read_csv("ic50_matrix.csv")

# make empty dict for dataframes by type
# keys: drug types, values: dfs of ic50 vals for drugs in that type
ic50sByDrugType = {}

#read in supplementary table
# drugInfo = pd.read_csv("Drug_Info_Supp_Table.csv")
drugInfoWithUnderscores = drugInfo
drugInfoWithUnderscores["Drug"] = drugInfo["Drug"].str.replace(' ', '_')

#group all drugs by their type and turn into dictionary
drugTypesWithUnderscores = drugInfoWithUnderscores.groupby("Type")["Drug"].apply(list).to_dict()

# loop through the dictionary of types and drugs
for type, drugNames in drugTypesWithUnderscores.items():
    # select columns in the combined matrix for all the drugs in that type
    df = ic50Matrix[drugNames]
    # save those columns to the dict of types/values
    ic50sByDrugType[type] = df

# save the df for each type as a csv 
for type, df in ic50sByDrugType.items():
    df.to_csv(f"ic50_Matrix_{type}.csv", index = False)