# Extracting significant drugs from a drug proximity analysis result

This script extracts significant drugs from the result of a drug proximity analysis.

In [2]:
import pandas as pd
import scipy

In [3]:
z_score_thres = scipy.stats.norm.ppf(0.005) # for two-tailed test at 0.01 significance level = -2.576

## Function to extract and save significant drugs (0.01 significance)

In [4]:
def extract_and_save_significant_drugs(step, proximity_result_path, significant_drugs_path):
    print(f"Extracting significant drugs for {step} ...\n")
    
    # Load proximity results
    proximity_result = pd.read_csv(proximity_result_path)

    # Filter for significant drugs
    proximity_result_significant = proximity_result[proximity_result["z_score"] <= z_score_thres]

    # Print length before and after filtering
    print(f"Number of total drugs analysed for {step}: {len(proximity_result)}")
    print(f"Number of significant drugs for {step}: {len(proximity_result_significant)}")

    # Extract significant drugs
    # significant_drugs = proximity_result_significant["drug"].str.lower().unique()
    
    # Sort by ascending z-score and reset index
    proximity_result_significant_sorted = (
        proximity_result_significant.sort_values(by="z_score", ascending=True)
        .reset_index(drop=True)
    )
    
    # Set index starting at 1
    proximity_result_significant_sorted.index += 1
    
    # Save significant drugs to CSV file
    proximity_result_significant_df = pd.DataFrame(proximity_result_significant_sorted)
    proximity_result_significant_df.to_csv(significant_drugs_path, index=True, header=True)
    print(f"Significant drugs for {step} saved to {significant_drugs_path}\n")

## Extract significant drugs for whole gene list (1000 iterations)

In [10]:
print("================== FOR WHOLE GENE LISTS (1000 ITERATIONS) ==================")

# Define proximity result file reading paths and significant drugs output paths
steps = {
    "step 1 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step1.csv",
       "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_step1.csv"
    },
    "step 2 of differentiation": {
        "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step2.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_step2.csv"
    },
    "step 3 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step3.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_step3.csv"
    },
    "full differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_full.csv",
       "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_full_diff.csv"
    }
}

# Extract and save significant drugs for each step
for step, paths in steps.items():
    extract_and_save_significant_drugs(step=step,
                                       proximity_result_path=paths["proximity_result_path"],
                                       significant_drugs_path=paths["significant_drugs_path"])

Extracting significant drugs for step 1 of differentiation ...

Number of total drugs analysed for step 1 of differentiation: 2244
Number of significant drugs for step 1 of differentiation: 641
Significant drugs for step 1 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_step1.csv

Extracting significant drugs for step 2 of differentiation ...

Number of total drugs analysed for step 2 of differentiation: 2244
Number of significant drugs for step 2 of differentiation: 779
Significant drugs for step 2 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_whole_1000_step2.csv

Extracting significant drugs for step 3 of differentiation ...

Number of total drugs analysed for step 3 of differentiation: 2244
Number of significant drugs for step 3 of differentiation: 636
Significant drugs for step 3 of differentiation saved to ../results/humanPVATsn/network_

## Extract significant drugs for key gene list (100 iterations)

In [None]:
print("================== FOR KEY GENE LISTS ONLY (100 ITERATIONS) ==================")

# Define proximity result file reading paths and significant drugs output paths
steps = {
    "step 1 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step1_key_only.csv",
       "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_step1.csv"
    },
    "step 2 of differentiation": {
        "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step2_key_only.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_step2.csv"
     },
    "step 3 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step3_key_only.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_step3.csv"
    },
     "full differentiation": {
        "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_full_key_only.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_full_diff.csv"
    }
}

# Extract and save significant drugs for each step
for step, paths in steps.items():
    extract_and_save_significant_drugs(step=step,
                                       proximity_result_path=paths["proximity_result_path"],
                                       significant_drugs_path=paths["significant_drugs_path"])

Extracting significant drugs for step 1 of differentiation ...

Number of total drugs analysed for step 1 of differentiation: 2244
Number of significant drugs for step 1 of differentiation: 322
Significant drugs for step 1 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_step1.csv

Extracting significant drugs for step 2 of differentiation ...

Number of total drugs analysed for step 2 of differentiation: 2244
Number of significant drugs for step 2 of differentiation: 423
Significant drugs for step 2 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_100_step2.csv

Extracting significant drugs for step 3 of differentiation ...

Number of total drugs analysed for step 3 of differentiation: 2244
Number of significant drugs for step 3 of differentiation: 320
Significant drugs for step 3 of differentiation saved to ../results/humanPVATsn/network_analys

## Extract significant drugs for key gene list (1000 iterations)

In [5]:
print("================== FOR KEY GENE LISTS ONLY (1000 ITERATIONS) ==================")

# Define proximity result file reading paths and significant drugs output paths
steps = {
    "step 1 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step1_key_only_1000_iterations.csv",
       "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_step1.csv"
    },
    "step 2 of differentiation": {
        "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step2_key_only_1000_iterations.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_step2.csv"
    },
    "step 3 of differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_step3_key_only_1000_iterations.csv",
        "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_step3.csv"
    },
    "full differentiation": {
       "proximity_result_path": "../results/humanPVATsn/network_analysis/proximity_full_key_only_1000_iterations.csv",
       "significant_drugs_path": "../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_full_diff.csv"
    }
}

# Extract and save significant drugs for each step
for step, paths in steps.items():
    extract_and_save_significant_drugs(step=step,
                                       proximity_result_path=paths["proximity_result_path"],
                                       significant_drugs_path=paths["significant_drugs_path"])

Extracting significant drugs for step 1 of differentiation ...

Number of total drugs analysed for step 1 of differentiation: 2244
Number of significant drugs for step 1 of differentiation: 335
Significant drugs for step 1 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_step1.csv

Extracting significant drugs for step 2 of differentiation ...

Number of total drugs analysed for step 2 of differentiation: 2244
Number of significant drugs for step 2 of differentiation: 450
Significant drugs for step 2 of differentiation saved to ../results/humanPVATsn/network_analysis/proximity_significant_drugs/significant_drugs_key_1000_step2.csv

Extracting significant drugs for step 3 of differentiation ...

Number of total drugs analysed for step 3 of differentiation: 2244
Number of significant drugs for step 3 of differentiation: 314
Significant drugs for step 3 of differentiation saved to ../results/humanPVATsn/network_anal