# Generate PPI, DPI, and gene lists

This generates the three inputs for network analysis: a protein-protein interaction (PPI) list, a drug-protein interaction (DPI) list, and a gene list.

In [1]:
import pandas as pd

## Create the combined PPI list

Load and reformat the downloaded interactomes.

In [2]:
# ====== 1. STRING ======
# Load reformatted STRING data (reformatting done in R using biomaRt)
string_PPI = pd.read_csv("../data/networks/string_PPI.csv", sep=",")


# ====== 2. BioGRID ======
# Load BioGRID data
biogrid_PPI = pd.read_csv("../data/networks/BIOGRID-MV-Physical-4.4.245.tab3.txt", sep="\t")

# Filter to keep only human interactions
biogrid_PPI = biogrid_PPI[
    (biogrid_PPI["Organism ID Interactor A"] == 9606) &
    (biogrid_PPI["Organism ID Interactor B"] == 9606)
]

# Filter and reformat
biogrid_PPI = biogrid_PPI[["Official Symbol Interactor A", "Official Symbol Interactor B"]]
biogrid_PPI.columns = ["GeneA", "GeneB"]
biogrid_PPI.to_csv("../data/networks/biogrid_PPI.csv", index=False)


# ====== 3. HINT ======
# Load HINT data
hint_PPI = pd.read_csv("../data/networks/HINT_HomoSapiens_binary_hq.txt", sep="\t")

# Filter and reformat
hint_PPI = hint_PPI[["Gene_A", "Gene_B"]]
hint_PPI.columns = ["GeneA", "GeneB"]
hint_PPI.to_csv("../data/networks/hint_PPI.csv", index=False)


  biogrid_PPI = pd.read_csv("../data/networks/BIOGRID-MV-Physical-4.4.245.tab3.txt", sep="\t")


In [3]:
# Create summary table
ppi_counts = pd.DataFrame({
    "Dataset": ["BIOGRID", "STRING", "HINT"],
    "Num_Interactions": [
        len(biogrid_PPI),
        len(string_PPI),
        len(hint_PPI)
    ]
})

# Print the table
print(ppi_counts)

   Dataset  Num_Interactions
0  BIOGRID            318015
1   STRING            207024
2     HINT            163435


Combine all PPI networks into one list without duplications

In [5]:

# Combine all PPI networks
combined_ppi = pd.concat([biogrid_PPI, string_PPI, hint_PPI], ignore_index=True)
print("Combined PPI, length:", len(combined_ppi))

# Remove empty rows
combined_ppi = combined_ppi.dropna()
print("After removing rows with empty entry, length:", len(combined_ppi))

# Remove self-loops
combined_ppi = combined_ppi[combined_ppi["GeneA"] != combined_ppi["GeneB"]]
print("After removing self-loops, length:", len(combined_ppi))

# Remove duplicates in both directions
combined_ppi.drop_duplicates()
print("After removing duplicates, length:", len(combined_ppi))
combined_ppi["sorted_pair"] = combined_ppi.apply(lambda row: tuple(sorted([row["GeneA"], row["GeneB"]])), axis=1)
combined_ppi = combined_ppi.drop_duplicates(subset="sorted_pair")
combined_ppi = combined_ppi.drop(columns="sorted_pair")
print("After removing bidirectional duplicates, length:", len(combined_ppi))

# Save combined PPI
combined_ppi.to_csv("../data/networks/combined_PPI.csv", index=False)
print("Combined PPI network saved as 'combined_PPI.csv'")

Combined PPI, length: 688474
After removing rows with empty entry, length: 678651
After removing self-loops, length: 666715
After removing duplicates, length: 666715
After removing bidirectional duplicates, length: 345547
Combined PPI network saved as 'combined_PPI.csv'


## Create the drug-gene interaction list

### DrugBank
From DrugBank, I take the files bonds.csv, polypeptides.csv, and bio_entities.csv and combine them to generate a drug-gene interaction list containing the drug bank ID and then the gene name of the protein it interacts with.

In [129]:

# ============== Load data ==============

print("\nLoading dataframes...\n")

bonds = pd.read_csv("../data/networks/milner_drugbank_postgresql/bonds.csv", sep=",", header=None)
bonds.columns = [
    "id", "type", "drug_id", "biodb_id", "pdb_id", "position",
    "pharmacological_action", "antagonist", "agonist", "substrate",
    "inhibitor", "inducer", "other_action", "inducer_strength",
    "inhibitor_strength", "induction_clinically_sig", "inhibition_clinically_sig"
]

polypeptides = pd.read_csv("../data/networks/milner_drugbank_postgresql/polypeptides.csv", sep=",", header=None)
polypeptides.columns = [
    "uniprot_id", "name", "uniprot_name", "gene_name", "organism_id",
    "molecular_weight", "theoretical_pi", "general_function", "specific_function",
    "signal_regions", "transmembrane_regions", "pdb_ids", "genbank_gene_id",
    "genbank_protein_id", "genecard_id", "locus", "genatlas_id", "hgnc_id",
    "meta_cyc_id", "ncbi_sequence_ids", "tissue_specificity", "cofactor",
    "subunit", "cellular_location", "amino_acid_sequence", "gene_sequence"
]

bio_entities = pd.read_csv("../data/networks/milner_drugbank_postgresql/bio_entities.csv", sep=",", header=None)
bio_entities.columns = [
    "biodb_id", "name", "kind", "organism"
]

drugs = pd.read_csv("../data/networks/milner_drugbank_postgresql/drugs.csv", sep=",", header=None)
drugs.columns = [
    "id", "type", "drugbank_id", "name", "state", "description",
    "simple_description", "clinical_description", "cas_number",
    "protein_formula", "protein_weight", "investigational", "approved",
    "vet_approved", "experimental", "nutraceutical", "illicit", "withdrawn",
    "moldb_mono_mass", "moldb_inchi", "moldb_inchikey", "moldb_smiles",
    "moldb_average_mass", "moldb_formula", "synthesis_patent_id",
    "protein_weight_details", "biotech_kind"
]

# Rename column for clarity
drugs.rename(columns={"id": "drug_id", "name": "drug_name"}, inplace=True)

# Print head of each DataFrame
# print("Bonds DataFrame head:")
# print(bonds.head())
# print("\nPolypeptides DataFrame head:")
# print(polypeptides.head())
# print("\nBio Entities DataFrame head:")
# print(bio_entities.head())
# print("\nDrugs DataFrame head:")
# print(drugs.head())


# ============== Filter dataframes ==============

print("\nFiltering dataframes...\n")

# Filter for target bonds with pharmacological action
target_bonds = bonds[
    (bonds["type"] == "TargetBond") &
    (bonds["pharmacological_action"] == "yes")
].copy()
target_bonds = target_bonds[["drug_id", "biodb_id"]]

# Filter for human proteins
polypeptides = polypeptides[polypeptides["organism_id"] == 154].copy()
polypeptides = polypeptides[["name", "gene_name"]]

# Filter for human proteins
bio_entities = bio_entities[
    (bio_entities["kind"] == "protein") &
    (bio_entities["organism"] == "Humans")
].copy()
bio_entities = bio_entities[["biodb_id", "name"]]

# Filter for drugs approved or in clinical trials, exclude non-human, illicit, nutraceutical, or withdrawn
drugs = drugs[
    (drugs["approved"] == 1) &
    (drugs["withdrawn"] == 0) &
    (drugs["illicit"] == 0) &
    (drugs["nutraceutical"] == 0)
].copy()

# Filter for id, drug bank id, and name
drugs = drugs[["drug_id", "drugbank_id", "drug_name"]]

# ============== Merge dataframes ==============

print("\nMerging dataframes...\n")

# Merge drug ID's with protein names
drug_protein = target_bonds.merge(bio_entities, on="biodb_id", how="left")

# Merge with gene names
drug_gene = drug_protein.merge(polypeptides, on="name", how="left")

# Merge with drug bank IDs and drug names
drug_gene = drug_gene.merge(drugs, on="drug_id", how="inner") # Keep only rows where drug_id exists in both dfs

# Check number of unique approved drugs before final formatting
approved_only = drug_gene[drug_gene["drugbank_id"].notna()]
print(f"Number of unique approved DrugBank drugs: {approved_only['drug_name'].nunique()}")
print(f"Total DPI rows for approved drugs: {len(approved_only)}")

# Drop missing target gene names
drug_gene = drug_gene.dropna(subset=["gene_name"])

# Extract and clean final DataFrame
drug_gene_final = drug_gene[["drug_name", "gene_name"]]
drug_gene_final.columns = ["Drug_Name", "Drug_Target"]

# Verify number of rows with named drugs
total_rows = len(drug_gene_final)
missing_names = drug_gene_final["Drug_Name"].isna().sum()
named_rows = total_rows - missing_names
print("\nTotal drug–gene pairs:", total_rows)
print("With drug names:", named_rows)

# Remove rows with NaN and duplicates
drug_gene_final = drug_gene_final.dropna()
drug_gene_final = drug_gene_final.drop_duplicates()

# Print key info
print("\nFinal DrugBank DPI head:\n", drug_gene_final.head())
print("\nNumber of unique drugs:", drug_gene_final["Drug_Name"].nunique())
print("Number of unique target genes:", drug_gene_final["Drug_Target"].nunique())
print("\nFinal DrugBank DPI dimensions:", drug_gene_final.shape)

# Save final DPI list
drug_gene_final.to_csv("../data/networks/drugbank_DPI.csv", index=False)



Loading dataframes...


Filtering dataframes...


Merging dataframes...

Number of unique approved DrugBank drugs: 1863
Total DPI rows for approved drugs: 3698

Total drug–gene pairs: 2879
With drug names: 2879

Final DrugBank DPI head:
              Drug_Name Drug_Target
0            Cetuximab        EGFR
1         Dornase alfa      DNASE1
2  Denileukin diftitox       IL2RA
3  Denileukin diftitox       IL2RB
4           Etanercept         TNF

Number of unique drugs: 1438
Number of unique target genes: 790

Final DrugBank DPI dimensions: (2873, 2)


### ChEMBL
From CheMBL, I downloaded the chembl_35_sqlite.tar.gz file from version 35.

In [7]:
import sqlite3
import pandas as pd

# ============== Connect to ChEMBL database ==============

conn = sqlite3.connect("../data/networks/chembl_35/chembl_35_sqlite/chembl_35.db")

cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

# ============== Query high-confidence durg-protein interactions ==============

print("\nQuerying ChEMBL DPI...\n")

query = """
SELECT DISTINCT
    activities.molregno,
    target_dictionary.chembl_id AS target_chembl_id
FROM
    activities
JOIN
    assays ON activities.assay_id = assays.assay_id
JOIN
    target_dictionary ON assays.tid = target_dictionary.tid
JOIN
    target_components ON target_dictionary.tid = target_components.tid
JOIN
    component_sequences ON target_components.component_id = component_sequences.component_id
WHERE
    activities.standard_relation = '='
    AND activities.standard_type IN ('IC50', 'Ki', 'Kd', 'EC50')
    AND activities.standard_value IS NOT NULL
    AND component_sequences.organism = 'Homo sapiens'
"""

chembl_dpi = pd.read_sql(query, conn)
print("DPI head before target mapping:\n", chembl_dpi.head())
print("\nNumber of unique drugs:", chembl_dpi["molregno"].nunique())
print("Number of unique targets:", chembl_dpi["target_chembl_id"].nunique())
print("\nDPI shape before target mapping:", chembl_dpi.shape)

# ============== Map target ChEMBL IDs to UniProt IDs ==============

print("\nMapping ChEMBL target IDs to UniProt...\n")

uniprot_map = pd.read_csv("../data/networks/chembl_35/chembl_35_sqlite/chembl_uniprot_mapping.txt", sep="\t", header=None, skiprows=1)
uniprot_map.columns = ["target_uniprot_id", "target_chembl_id", "target_name", "target_type"]
uniprot_map = uniprot_map[["target_chembl_id", "target_uniprot_id"]]

chembl_dpi_final = chembl_dpi.merge(uniprot_map, on="target_chembl_id", how="left")
chembl_dpi_final = chembl_dpi_final.dropna()
print("DPI head after target mapping:\n", chembl_dpi_final.head())
print("\nDPI shape after target mapping:", chembl_dpi.shape)

# Save intermediate DPI
chembl_dpi_final.to_csv("../data/networks/inter_chembl_DPI.csv", index=False)



Querying ChEMBL DPI...

DPI head before target mapping:
    molregno target_chembl_id
0    179188        CHEMBL235
1     33632        CHEMBL259
2     33415        CHEMBL259
3     33415       CHEMBL3795
4    219299       CHEMBL3974

Number of unique drugs: 673271
Number of unique targets: 3768

DPI shape before target mapping: (1088205, 2)

Mapping ChEMBL target IDs to UniProt...

DPI head after target mapping:
    molregno target_chembl_id target_uniprot_id
0    179188        CHEMBL235            P37231
1     33632        CHEMBL259            P32245
2     33415        CHEMBL259            P32245
3     33415       CHEMBL3795            Q01726
4    219299       CHEMBL3974            P25116

DPI shape after target mapping: (1088205, 2)


The mapping of target UniProt IDs to target gene names was done in R. The next step is to convert the drug molregno IDs to actual ChEMBL IDs.

In [None]:
# ============== Map target UniProt IDs to gene names in R ==============
#
# See network_reformatting_DPI.Rmd
#
# ============== Annotate with compound names and filter for therapeutic use ==============

print("\nLoading compound info and filtering...\n")

# Load annotated DPI
chembl_dpi = pd.read_csv("../data/networks/annotated_chembl_dpi.csv", sep=",")

# Query to get compound names
compound_info_query = """
SELECT
    molregno,
    chembl_id AS compound_chembl_id,
    pref_name AS compound_name
FROM
    molecule_dictionary
"""
compound_info = pd.read_sql(compound_info_query, conn)

# Merge to get compound name
chembl_dpi = chembl_dpi.merge(compound_info, on="molregno", how="left")

# Query to get clinical phase and therapeutic use info
compound_meta_query = """
SELECT chembl_id, max_phase, therapeutic_flag, molecule_type
FROM molecule_dictionary
"""
compound_meta = pd.read_sql(compound_meta_query, conn)

compound_meta = compound_meta[
    (compound_meta["max_phase"] >= 1) &        # Keep phase I or higher (approved or in clinical trials)
    (compound_meta["therapeutic_flag"] == 1)   # Keep compounds intended for therapeutic use
]
compound_meta = compound_meta.dropna(subset=["max_phase"])
compound_meta = compound_meta.dropna(subset=["therapeutic_flag"])

# Merge to retain only clinically relevant compounds
chembl_dpi = chembl_dpi.merge(compound_meta, left_on="compound_chembl_id", right_on="chembl_id", how="inner")
chembl_dpi = chembl_dpi.drop(columns=["chembl_id"])

# Save intermediate DPI with compound names and meta info
chembl_dpi_with_meta = chembl_dpi.copy()
chembl_dpi_with_meta = chembl_dpi_with_meta.drop_duplicates(subset=["compound_name", "gene_name"])

# Verify filtering
print("Summary of therapeutic stats after filtering:")
print("\n=== max_phase ===")
print(chembl_dpi["max_phase"].value_counts(dropna=False))
print("\n=== therapeutic_flag ===")
print(chembl_dpi["therapeutic_flag"].value_counts(dropna=False))
print("\n=== molecule_type ===")
print(chembl_dpi["molecule_type"].value_counts(dropna=False))


# ============== Final processing and export ==============

print("\nFinal formatting and export...\n")

# Rename columns and remove unnamed drugs
chembl_dpi_final = chembl_dpi[["compound_name", "gene_name"]].copy()
chembl_dpi_final.columns = ["Drug_Name", "Drug_Target"]
chembl_dpi_final = chembl_dpi_final.dropna(subset=["Drug_Name"])

# Drop duplicates
print("Before removing duplicates:", len(chembl_dpi_final))
chembl_dpi_final = chembl_dpi_final.drop_duplicates()
print("After removing duplicates:", len(chembl_dpi_final))

# Print key info
print("Final ChEMBL DPI head:\n", chembl_dpi_final.head())
print("\nNumber of unique drugs:", chembl_dpi_final["Drug_Name"].nunique())
print("Number of unique target genes:", chembl_dpi_final["Drug_Target"].nunique())
print("\nFinal ChEMBL DPI dimensions:", chembl_dpi_final.shape)
num_unique_interactions = chembl_dpi_final.drop_duplicates(subset=["Drug_Name", "Drug_Target"]).shape[0]
print(f"Number of unique drug–gene interaction pairs: {num_unique_interactions}")

# Save final ChEMBL DPI DataFrame
chembl_dpi_final.to_csv("../data/networks/chembl_DPI.csv", index=False)



Loading compound info and filtering...

Summary of therapeutic stats after filtering:

=== max_phase ===
max_phase
4.0    15357
3.0      105
2.0        2
Name: count, dtype: int64

=== therapeutic_flag ===
therapeutic_flag
1    15464
Name: count, dtype: int64

=== molecule_type ===
molecule_type
Small molecule     15188
Protein              234
Unknown               39
Oligosaccharide        2
None                   1
Name: count, dtype: int64

Final formatting and export...

Before removing duplicates: 15464
After removing duplicates: 14253
Final ChEMBL DPI head:
    Drug_Name Drug_Target
0  CLONIDINE      ADRA1A
1  CLONIDINE      ADRA1D
2  CLONIDINE      ADRA1B
3  CLONIDINE      ADRA2A
4  CLONIDINE      ADRA2C

Number of unique drugs: 1594
Number of unique target genes: 1649

Final ChEMBL DPI dimensions: (14253, 2)


In [127]:
# Check how many unique approved drugs are in ChEMBL DPI
approved_only = chembl_dpi_with_meta[chembl_dpi_with_meta["max_phase"] == 4]
print(f"Number of unique approved drugs in ChEMBL DPI: {approved_only['compound_name'].nunique()}")
print(f"Total DPI rows for approved drugs: {len(approved_only)}")

Number of unique approved drugs in ChEMBL DPI: 1588
Total DPI rows for approved drugs: 14146


## Merge DrugBank and ChEMBL DPI lists 

In [28]:
# Load lists of drug–gene pairs from DrugBank and ChEMBL
drugbank_dpi = pd.read_csv("../data/networks/drugbank_DPI.csv", sep=",")
chembl_dpi = pd.read_csv("../data/networks/chembl_DPI.csv", sep=",")

# Standardise case and remove whitespace
for df in [drugbank_dpi, chembl_dpi]:
    df["Drug_Name"] = df["Drug_Name"].str.strip().str.lower()
    df["Drug_Target"] = df["Drug_Target"].str.strip().str.upper()

# Compare drug name sets
chembl_drugs = set(chembl_dpi["Drug_Name"].unique())
drugbank_drugs = set(drugbank_dpi["Drug_Name"].unique())
shared_drugs = chembl_drugs & drugbank_drugs

print(f"Number of unique drug names in ChEMBL: {len(chembl_drugs)}")
print(f"Number of unique drug names in DrugBank: {len(drugbank_drugs)}")
print(f"Number of shared drug names: {len(shared_drugs)}")

# === Make one combined DPI dataframe containing the drug source ===
# Track the drug source
drugbank_dpi_with_source = drugbank_dpi.copy()
chembl_dpi_with_source = chembl_dpi.copy()
# Add source column to each DataFrame
drugbank_dpi_with_source["Source"] = "DrugBank"
chembl_dpi_with_source["Source"] = "ChEMBL"
# Concatenate and drop duplicates: Group by drug and target, and merge source information
combined_dpi_with_source = (
    pd.concat([drugbank_dpi_with_source, chembl_dpi_with_source], ignore_index=True)
    .groupby(["Drug_Name", "Drug_Target"])["Source"]
    .agg(lambda x: "Both" if set(x) == {"ChEMBL", "DrugBank"} else x.iloc[0])
    .reset_index()
)
# Save to CSV
combined_dpi_with_source.to_csv("../data/networks/combined_DPI_with_source.csv", index=False)
# =====

# Concatenate and drop duplicates
combined_dpi = pd.concat([drugbank_dpi, chembl_dpi], ignore_index=True)
print("Total drug-target interactions before removing duplicates:", len(combined_dpi))
combined_dpi = combined_dpi.drop_duplicates()
print("Total drug-target interactions after removing duplicates:", len(combined_dpi))

# Save to CSV
combined_dpi.to_csv("../data/networks/combined_DPI.csv", index=False)

# Compute and print summary statistics
num_unique_drugs = combined_dpi["Drug_Name"].nunique()
num_unique_targets = combined_dpi["Drug_Target"].nunique()
num_unique_interactions = combined_dpi.shape[0]

print(f"Total unique drug-target interactions: {num_unique_interactions}")
print(f"Number of unique drugs: {num_unique_drugs}")
print(f"Number of unique targets: {num_unique_targets}")


Number of unique drug names in ChEMBL: 1594
Number of unique drug names in DrugBank: 1438
Number of shared drug names: 783
Total drug-target interactions before removing duplicates: 17126
Total drug-target interactions after removing duplicates: 15951
Total unique drug-target interactions: 15951
Number of unique drugs: 2249
Number of unique targets: 1933


## Create the disease gene lists

In [43]:
# ====== STEP 1 ======

# Load data
step1 = pd.read_excel("../results/humanPVATsn/pathfindR/full/Sonia_network/step1_only_sonia.xls")

# Keep only gene columns
step1_genes = step1[["Up_regulated_A", "Down_regulated_A"]].copy()

# Combine columns into one list
step1_deg = (
    step1_genes["Up_regulated_A"].dropna().str.split(", ").explode().tolist() +
    step1_genes["Down_regulated_A"].dropna().str.split(", ").explode().tolist()
)

# Create the new DataFrame
step1_deg = pd.DataFrame({"DEG": step1_deg})

# Remove duplicates
step1_deg = step1_deg.drop_duplicates().reset_index(drop=True)

print(step1_deg.head())
print(step1_deg.shape)

      DEG
0  ADAM10
1    JAG1
2    RHOA
3     FN1
4  FERMT2
(70, 1)


In [357]:
# ====== STEP 2 ======

# Load data
step2 = pd.read_excel("../results/humanPVATsn/pathfindR/full/Sonia_network/step2_only_sonia.xls")

# Keep only gene columns
step2_genes = step2[["Up_regulated_A", "Down_regulated_A", "Up_regulated_B", "Down_regulated_B",]].copy()

# Combine columns into one list
step2_deg = (
    step2_genes["Up_regulated_A"].dropna().str.split(", ").explode().tolist() +
    step2_genes["Down_regulated_A"].dropna().str.split(", ").explode().tolist() +
    step2_genes["Up_regulated_B"].dropna().str.split(", ").explode().tolist() +
    step2_genes["Down_regulated_B"].dropna().str.split(", ").explode().tolist()
)

# Create the new DataFrame
step2_deg = pd.DataFrame({"DEG": step2_deg})

# Remove duplicates
step2_deg = step2_deg.drop_duplicates().reset_index(drop=True)

print(step2_deg.head())
print(step2_deg.shape)

      DEG
0     APP
1  ZBTB20
2  CAMK2D
3   PRKG1
4   DCLK1
(99, 1)


In [358]:
# ====== STEP 3 ======

# Load data
step3 = pd.read_excel("../results/humanPVATsn/pathfindR/full/Sonia_network/step3_only_sonia.xls")

# Keep only gene columns
step3_genes = step3[["Up_regulated_B", "Down_regulated_B",]].copy()

# Combine columns into one list
step3_deg = (
    step3_genes["Up_regulated_B"].dropna().str.split(", ").explode().tolist() +
    step3_genes["Down_regulated_B"].dropna().str.split(", ").explode().tolist()
)

# Create the new DataFrame
step3_deg = pd.DataFrame({"DEG": step3_deg})

# Remove duplicates
step3_deg = step3_deg.drop_duplicates().reset_index(drop=True)

print(step3_deg.head())
print(step3_deg.shape)

      DEG
0   CDH11
1  COL1A1
2  COL1A2
3    FBN1
4   FGFR1
(56, 1)


In [359]:
# ====== FULL DIFFERENTIATION ======

# Load data
full_diff = pd.read_excel("../results/humanPVATsn/pathfindR/full/Sonia_network/full_diff_only_sonia.xls")

# Keep only gene columns
full_diff_genes = full_diff[["Up_regulated", "Down_regulated",]].copy()

# Combine columns into one list
full_diff_deg = (
    full_diff_genes["Up_regulated"].dropna().str.split(", ").explode().tolist() +
    full_diff_genes["Down_regulated"].dropna().str.split(", ").explode().tolist()
)

# Create the new DataFrame
full_diff_deg = pd.DataFrame({"DEG": full_diff_deg})

# Remove duplicates
full_diff_deg = full_diff_deg.drop_duplicates().reset_index(drop=True)

print(full_diff_deg.head())
print(full_diff_deg.shape)

       DEG
0  PACSIN2
1    ITGAV
2    MYO9B
3    ROBO1
4    SLIT2
(168, 1)


In [360]:
# ====== SAVE FILES ======

step1_deg.to_csv("../data/networks/step1_deg.csv", index=False, header=False)
step2_deg.to_csv("../data/networks/step2_deg.csv", index=False, header=False)
step3_deg.to_csv("../data/networks/step3_deg.csv", index=False, header=False)
full_diff_deg.to_csv("../data/networks/full_diff_deg.csv", index=False, header=False)