In [1]:
import os
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
from sklearn.model_selection import cross_val_predict
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneOut, KFold, cross_val_score

In [3]:
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


In [4]:
output_dir = 'C:\\Users\\MONSTER\\Desktop\\TEZ SON\\CCLE\\OUTPUT3'

In [5]:
os.chdir(output_dir)

In [6]:
Drugs = [item1 for item1 in os.listdir() if os.path.isdir(item1)]

In [7]:
Train_IC50_predict = dict()
Test_IC50_predict = dict()
Drug_dict = dict()  # Her histoloji için bir ilaç sözlüğü oluşturun
os.chdir(output_dir)
    
Drugs = [item1 for item1 in os.listdir() if os.path.isdir(item1)]
ind = 1
        
for drug in Drugs:
    os.chdir(os.path.join(output_dir, drug))
    expression_data = pd.read_csv(drug + " - Expression Data (IC50) - Normalized3.tsv", sep="\t")
    
    if len(expression_data.columns) > 2 and len(expression_data['Sample_Name'].unique()) > 20:
        print(f"\nDrug {ind}: Support Vector Regression is running for {drug}", flush=True)
        x1 = expression_data.drop(['Sample_Name', 'IC50'], axis=1)
        y1 = expression_data['IC50']
        
        if len(x1.columns)>= 5:
            x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.30, random_state=42)
            
            loo = LeaveOneOut()
            svr_model = SVR(kernel='rbf')
            scores_loocv = cross_val_score(svr_model, x_train, y_train, cv=loo, scoring='neg_mean_squared_error')
            mean_score_loocv = scores_loocv.mean()

            # k-fold kullanılıyor.
            kfold = KFold(n_splits=2, random_state=1, shuffle = True)
            scores_kfold = cross_val_score(svr_model, x_train, y_train, cv=kfold, scoring='neg_mean_squared_error')
            mean_score_kfold = scores_kfold.mean()

            svr_model.fit(x_train, y_train)
            y_train_pred = svr_model.predict(x_train)
            y_test_pred = svr_model.predict(x_test)

            
                
            # Sonuçları toplama
            r_squared_train = r2_score(y_train, y_train_pred)
            pearson_correlation_train, p_value_train_pearsonr = pearsonr(y_train_pred, y_train)
            corr_train_spearmanr, p_value_train_spearmanr = spearmanr(y_train_pred, y_train)
            rmse_val_train = np.sqrt(mean_squared_error(y_train_pred, y_train))
                
            r_squared_test = r2_score(y_test, y_test_pred)
            pearson_correlation_test, p_value_test_pearsonr = pearsonr(y_test_pred, y_test)
            corr_test_spearmanr, p_value_test_spearmanr = spearmanr(y_test_pred, y_test)
            rmse_val_test = np.sqrt(mean_squared_error(y_test_pred, y_test))
                
            result_table_drug = pd.DataFrame({
                         
                        'Drug': drug,
                        'R_Square_Train': r_squared_train,
                        'R_Square_Test': r_squared_test,
                        'Pearson_Correlation_Train': pearson_correlation_train,
                        'Pearson_Correlation_Test': pearson_correlation_test,
                        'Pearson_Correlation_P_Value_Train': p_value_train_pearsonr,
                        'Pearson_Correlation_P_Value_Test': p_value_test_pearsonr,
                        'Spearman_Correlation_Train': corr_train_spearmanr,
                        'Spearman_Correlation_Test': corr_test_spearmanr,
                        'Spearman_Correlation_P_Value_Train': p_value_train_spearmanr,
                        'Spearman_Correlation_P_Value_Test': p_value_test_spearmanr,
                        'RMSE_Train': rmse_val_train,
                        'RMSE_Test': rmse_val_test
                }, index=[0])
                    
            Drug_dict[drug] = result_table_drug
            print(Drug_dict[drug])
                
            # Sonuçları dosyalara kaydetme
            os.mkdir(os.path.join(output_dir, drug, "Support Vector Regression"))
            result_table_drug.to_csv(os.path.join(output_dir, drug, "Support Vector Regression", drug + "- Result_SVR.tsv"), 
                                                 sep='\t', index=False, quoting=False)
            Train_IC50_predict[drug] = pd.DataFrame({'Actual_IC50_Train_Data': y_train, 'Predicted_IC50_Train_Data': y_train_pred})
            Train_IC50_predict[drug].to_csv(os.path.join(output_dir, drug, "Support Vector Regression", drug + "- IC50_Train_Result.tsv"), 
                                                        sep='\t', index=False, quoting=False)
            Test_IC50_predict[drug] = pd.DataFrame({'Actual_IC50_Test_Data': y_test, 'Predicted_IC50_Test_Data': y_test_pred})
            Test_IC50_predict[drug].to_csv(os.path.join(output_dir, drug, "Support Vector Regression", drug + "- IC50_Test_Result.tsv"), 
                                                       sep='\t', index=False, quoting=False)
                
            ind = ind + 1

if Drug_dict:
    All_Result_SVR_CCLE_without_separated_tissue = pd.concat(Drug_dict.values())
    


Drug 1: Support Vector Regression is running for 17-AAG
     Drug  R_Square_Train  R_Square_Test  Pearson_Correlation_Train  \
0  17-AAG        0.066507        0.09868                   0.460494   

   Pearson_Correlation_Test  Pearson_Correlation_P_Value_Train  \
0                  0.465952                       1.120490e-17   

   Pearson_Correlation_P_Value_Test  Spearman_Correlation_Train  \
0                      1.398569e-08                    0.436218   

   Spearman_Correlation_Test  Spearman_Correlation_P_Value_Train  \
0                   0.217477                        7.849890e-16   

   Spearman_Correlation_P_Value_Test  RMSE_Train  RMSE_Test  
0                           0.011597     0.50367   0.366131  

Drug 2: Support Vector Regression is running for AEW541
     Drug  R_Square_Train  R_Square_Test  Pearson_Correlation_Train  \
0  AEW541        0.330906       0.182486                   0.597978   

   Pearson_Correlation_Test  Pearson_Correlation_P_Value_Train  \
0    

         Drug  R_Square_Train  R_Square_Test  Pearson_Correlation_Train  \
0  PD-0325901        0.287518       0.150109                   0.710752   

   Pearson_Correlation_Test  Pearson_Correlation_P_Value_Train  \
0                   0.56292                       2.245969e-24   

   Pearson_Correlation_P_Value_Test  Spearman_Correlation_Train  \
0                          0.000001                    0.810996   

   Spearman_Correlation_Test  Spearman_Correlation_P_Value_Train  \
0                   0.579458                        2.820357e-36   

   Spearman_Correlation_P_Value_Test  RMSE_Train  RMSE_Test  
0                       4.247772e-07    1.462838   1.724259  

Drug 14: Support Vector Regression is running for PD-0332991
         Drug  R_Square_Train  R_Square_Test  Pearson_Correlation_Train  \
0  PD-0332991        0.153632       0.194491                   0.446598   

   Pearson_Correlation_Test  Pearson_Correlation_P_Value_Train  \
0                  0.449231              

In [9]:
All_SVR_Result = All_Result_SVR_CCLE_without_separated_tissue.sort_values(by='R_Square_Test', ascending=False)
All_SVR_Result.to_excel('C:\\Users\\MONSTER\\Desktop\\TEZ SON\\CCLE\\OUTPUT3\\All_Result_SVR_CCLE_without_separated_tissue.xlsx', index=False)