In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import numpy as np
from sklearn.preprocessing import StandardScaler
import pubchempy as pcp
import matplotlib.pyplot as plt
import os
from PyFingerprint.fingerprint import get_fingerprint, get_fingerprints
import cirpy

import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Import Data

In [None]:
cek = pd.read_csv("../data/string_interactions_short.tsv default node.csv")
cek

In [None]:
def eigen(i):
    df=pd.read_csv('../data/'+i+'.csv')
    df_name=df['name']
    df=df[['BetweennessCentrality','ClosenessCentrality','ClusteringCoefficient','Degree','Radiality','Stress','TopologicalCoefficient']]#
    scaler = StandardScaler()
    df_scaled=(scaler.fit_transform(df))
    print(df_scaled)
    n_samples = df_scaled.shape[0]
    print(n_samples)
    pca = PCA()
    X_transformed = pca.fit_transform(df_scaled)
    print(X_transformed)
    X_centered = df_scaled - np.mean(df_scaled, axis=0)
    cov_matrix = np.dot(X_centered.T, X_centered) / n_samples
    eigenvalues = pca.explained_variance_
    print('clus ',i)
    print('var ratio:',pca.explained_variance_ratio_[0])
    print('eigen value:',eigenvalues[0])
    print('eigen vector:',pca.components_[0])
    print('_______________________________________________')

    df_scaled=pd.DataFrame(df_scaled,columns=['BetweennessCentrality','ClosenessCentrality','ClusteringCoefficient','Degree','Radiality','Stress','TopologicalCoefficient'])
    df_scaled['Overall']=0.0
    for k in range(0,n_samples):
        temp=0
        for j in range(0,len(pca.components_[0])):
            temp=temp+pca.components_[0][j]*df_scaled[df_scaled.columns[j]][k]
        df_scaled['Overall'][k]=temp
        print('overall ',temp)
    pd.concat([df_name, df_scaled], axis=1, sort=False).to_excel('../data/centrality_'+str(i)+'.xlsx')

In [None]:
eigen('string_interactions_short.tsv default node')

In [None]:
data = pd.read_excel('../data/centrality_string_interactions_short.tsv default node.xlsx')
data

In [None]:
x=data.iloc[:,1]
x

In [None]:
y=data.iloc[:,9]
y

In [None]:
plt.figure(figsize=(8,20))
plt.barh(x, y, color='lightcoral')

plt.ylabel('Skor', size=14)
plt.xticks(size=10)
plt.yticks(size=10)

plt.show()

In [None]:
Pakai = data[data['Overall'] >= 0].sort_values(by='Overall',ascending = False)
# Pakai.to_excel('Data PKM Alzheimer/Centrality_Overall_More_Than_Zero.xlsx')
Pakai

In [None]:
Pakai = Pakai.reset_index(drop = True).drop(columns = 'Unnamed: 0')

In [None]:
Nama = Pakai['name']
Nama

In [None]:
best_protein_candidate = Pakai.head(10).copy()
best_protein_candidate

In [None]:
best_protein_candidate["name"].to_list()

In [None]:
directory_path = '../data/compounds'

csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

compounds_df = []
for csv_file in csv_files:
  csv_path = os.path.join(directory_path, csv_file)
  df = pd.read_csv(csv_path)
  compounds_df.append(df)

combined_compounds_df = pd.concat(compounds_df, ignore_index=True)
combined_compounds_df.head()

In [None]:
compounds_compact_summary_df = combined_compounds_df[['cid', 'cmpdname', 'isosmiles']]
compounds_compact_summary_df

In [None]:
compounds_compact_summary_df.to_csv('../data/ligands/ligand_smiles_results.csv')

# Generate Ligand Fingerprint

In [None]:
list_of_ligand_fingerprints = []
for smiles in compounds_compact_summary_df['isosmiles']:
  fingerprint = get_fingerprint(smiles, 'pubchem')
  fingerprint = list(fingerprint.to_numpy().astype(int))
  list_of_ligand_fingerprints.append(fingerprint)

list_of_ligand_fingerprints_df = pd.DataFrame(list_of_ligand_fingerprints)
list_of_ligand_fingerprints_df.head(5)


In [None]:
compound_fingerprints_df = pd.concat([compounds_compact_summary_df, list_of_ligand_fingerprints_df], axis=1)
compound_fingerprints_df.head()

In [None]:
compound_fingerprints_df.to_csv('../data/fingerprints/ligand_fingerprints.csv')

# Generate Decoy Fingerprints

In [None]:
decoy_smiles = pd.read_csv("../data/decoys/decoy-smiles-results.csv")
decoy_smiles.drop(columns=['Kode-1', 'Kode-2'], axis=1, inplace=True)

list_of_decoy_fingerprints = []
for smiles in decoy_smiles['smiles']:
  fingerprint = get_fingerprint(smiles, 'pubchem')
  fingerprint = list(fingerprint.to_numpy().astype(int))
  list_of_decoy_fingerprints.append(fingerprint)

list_of_decoy_fingerprints_df = pd.DataFrame(list_of_decoy_fingerprints)
list_of_decoy_fingerprints_df.head(5)

In [None]:
decoy_fingerprints_df = pd.concat([decoy_smiles, list_of_decoy_fingerprints_df], axis=1)
decoy_fingerprints_df.to_csv("../data/fingerprints/decoy_fingerprints.csv")

# Labeling and Combine Data

In [None]:
ligand_pubchem_fingerprint = pd.read_csv("../data/fingerprints/ligand_fingerprints.csv")
ligand_pubchem_fingerprint.drop(columns=['Unnamed: 0', 'cid', 'cmpdname'], axis=1, inplace=True)
ligand_pubchem_fingerprint.rename(columns={'isosmiles':'smiles'}, inplace=True)
ligand_pubchem_fingerprint['class'] = 1
ligand_pubchem_fingerprint.head(10)

In [None]:
decoy_pubchem_fingerprint = pd.read_csv('../data/fingerprints/decoy_fingerprints.csv')
decoy_pubchem_fingerprint.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
decoy_pubchem_fingerprint['class'] = 0
decoy_pubchem_fingerprint.head(10)

In [None]:
all_fingerprints = pd.concat([ligand_pubchem_fingerprint, decoy_pubchem_fingerprint], axis=0, ignore_index=True)
all_fingerprints.to_csv('../data/results/all_fingerprints.csv')

# Herbal Compound

In [None]:
herbal_compounds = pd.read_csv("../data/herbals/herbal_data.csv")
herbal_compounds.head(10)

## Resolving using Chemical Identifier Resolver

In [None]:
from urllib.request import urlopen
from urllib.parse import quote

def CIRconvert(ids):
  try:
    url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
    ans = urlopen(url).read().decode('utf8')
    return ans
  except:
    return 'Did not work'

In [None]:
herbal_compound_list = herbal_compounds['Senyawa'].to_list()

herbal_compound_smiles_list = []
for compound in herbal_compound_list:
  compound_smiles = CIRconvert(compound)
  print(f"{compound} -> {compound_smiles}")
  herbal_compound_smiles_list.append(compound_smiles)

herbal_compound_smiles_list

In [None]:
herbal_compounds["smiles"] = herbal_compound_smiles_list
herbal_compounds.head(10)

In [None]:
herbal_compounds.to_csv("../data/herbals/herbal_smiles_part_1.csv", index=False)

In [None]:
herbal_smiles_by_CIR = pd.read_csv("../data/herbals/herbal_smiles_part_1.csv")
herbal_smiles_by_CIR.head(10)

## Resolving using Pubchempy

In [None]:
counter = 0
for index, row in herbal_smiles_by_CIR.iterrows():
    compound = row['Senyawa']
    smiles = row['smiles']
    if(smiles == "Did not work"):
      smiles_by_pcp = getHerbalSMILESFromPubchempy(compound)
      print(f"{counter} : {compound} -> {smiles_by_pcp}")
      herbal_compound_smiles_list[counter] = smiles_by_pcp
    
    counter = counter + 1

In [None]:
herbal_compounds["smiles"] = herbal_compound_smiles_list
herbal_compounds.head(10)

In [None]:
herbal_compounds.to_csv("../data/herbals/herbal_smiles_part_2.csv", index=False)

## All resolved herbal compounds SMILES

In [None]:
herbal_smiles_all = pd.read_csv('../data/herbals/herbal_smiles_part_2.csv')
# len(herbal_smiles_all[herbal_smiles_all['smiles'] != "Not Found"]) # Got 5561 data
herbal_smiles_all = herbal_smiles_all[herbal_smiles_all['smiles'] != "Not Found"]
herbal_smiles_all.head(20)

In [None]:
herbal_smiles_all.reset_index(drop=True, inplace=True)
herbal_smiles_all.head(20)

In [None]:
herbal_smiles_all.to_csv("../data/herbals/herbal_smiles_all.csv", index=False)

## Generate herbal fingerprints

In [None]:
herbal_smiles = pd.read_csv("../data/herbals/herbal_smiles_all.csv")

list_of_herbal_fingerprints = []
for smiles in herbal_smiles['smiles']:
  fingerprint = get_fingerprint(smiles, 'pubchem')
  fingerprint = list(fingerprint.to_numpy().astype(int))
  list_of_herbal_fingerprints.append(fingerprint)

list_of_herbal_fingerprints_df = pd.DataFrame(list_of_herbal_fingerprints)
list_of_herbal_fingerprints_df.head(5)

In [None]:
herbal_fingerprints_df = pd.concat([herbal_smiles, list_of_herbal_fingerprints_df], axis=1)
herbal_fingerprints_df.to_csv("../data/fingerprints/herbal_fingerprints.csv")
herbal_fingerprints_df.head(10)

## Labeling and combine data

In [None]:
herbal_fingerprints_df = pd.read_csv("../data/fingerprints/herbal_fingerprints.csv", )

herbal_fingerprints_df.drop(columns=["Senyawa", "Unnamed: 0"], axis=1, inplace=True)
herbal_fingerprints_df["class"] = 2
herbal_fingerprints_df.head(10)

In [None]:
all_fingerprints = pd.read_csv("../data/results/all_fingerprints.csv").reset_index(drop=True).drop(columns=["Unnamed: 0"], axis=1);

all_fingerprints = pd.concat([all_fingerprints, herbal_fingerprints_df], axis=0, ignore_index=True)
all_fingerprints.to_csv('../data/results/all_fingerprints.csv')