In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [None]:
#ORIGINAL DATA WITH OLS MODEL
df = pd.read_csv('C30ALL.csv')
enc = LabelEncoder()
df['Sex'] = enc.fit_transform(df['Sex'])
df['Tumor_Grade'] = enc.fit_transform(df['Tumor_Grade'])
df['Tumor_Stage'] = enc.fit_transform(df['Tumor_Stage'])
df['Clinical_Staging'] = enc.fit_transform(df['Clinical_Staging'])
x = df.iloc[:,:-5].values
x = np.array(x)
x = sm.add_constant(x)
y = df.iloc[:,-5:].values
y = np.array(y)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.4,random_state=220)
title = ['Paclitaxel','Vincristine','Daunorubicin','Vinblastine','Doxurubicin']
for i in range(5):
  y_train_ = y_train[:,i]
  model = sm.OLS(y_train_,x_train).fit()
  model.predict(x_test)
  print(title[i])
  y_pred = model.predict(x_test)
  test_accuracy = r2_score(y_test[:, i], y_pred)
  print(f'Test Accuracy: {test_accuracy:.4f}\n')

In [244]:
#ORGINAL DATA WITH SVR
from sklearn.svm import SVR
df = pd.read_csv('360ALL.csv')
cts = ['Sex', 'Age', 'Tumor_Grade', 'Tumor_Stage', 'Clinical_Staging', 'ABCB1-2', 'ABCC1-2', 'ABCC2-2', 'ABCC3-2', 'ABCC5-2', 'ABCG2-2',
 'CDK2-2', 'CDKN1A-2', 'LRP1-2', 'STAT5B-2', 'TP53-2', 'Paclitaxel', 'Vincristine', 'Daunorubicin', 'Vinblastine', 'Doxorubicin']
nf = np.log(df.iloc[:,5:-5])
for i in range(11):
  df.insert(5+i,cts[5+i],nf.iloc[:,i])
x = df.iloc[:,:-5].values
y = df.iloc[:,-5:].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.4,random_state=8)
svr = SVR(kernel ='poly')
title = ['Paclitaxel','Vincristine','Daunorubicin','Vinblastine','Doxurubicin']
for i in range(5):
  svr.fit(x_train,y_train[:,i])
  y_pred = svr.predict(x_test)
  test_accuracy = r2_score(y_test[:, i], y_pred)
  print(f'{title[i]} Accuracy: {test_accuracy:.4f}')

Paclitaxel	 : 0.7793802629289592
Vincristine	 : 0.7616497005477186
Daunorubicin	 : 0.7468620833491141
Vinblastine	 : 0.7347381370587596
Doxurubicin	 : 0.7035542502707886


In [188]:
#ORGINAL DATA WITH DECISION TREE
from sklearn.tree import DecisionTreeClassifier
df = pd.read_csv('360ALL.csv')
enc = LabelEncoder()
df['Sex'] = enc.fit_transform(df['Sex'])
df['Tumor_Grade'] = enc.fit_transform(df['Tumor_Grade'])
df['Tumor_Stage'] = enc.fit_transform(df['Tumor_Stage'])
df['Clinical_Staging'] = enc.fit_transform(df['Clinical_Staging'])
x = df.iloc[:,:-5].values
y = df.iloc[:,-5:].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.4,random_state=220)
x_train = x_train.astype(int)
y_train = y_train.astype(int)
tree = DecisionTreeClassifier(criterion = 'entropy',max_depth = 10)
for i in range(5):
  tree.fit(x_train,y_train[:,i])
  print(title[i],r2_score(tree.predict(x_test),y_test[:,i]))

Paclitaxel 0.0
Vincristine 0.0
Daunorubicin 0.0
Vinblastine 0.0
Doxurubicin 0.0


In [31]:
#SYNTHETIC DATA
import pandas as pd
data = pd.read_csv('C30ALL.csv')
import csv
with open ('C30ALL.csv','r') as fptr:
  read = csv.reader(fptr)
  categorical = []
  for i in read:
    categorical.append(i)
categorical_features = categorical[0]
from ctgan import CTGAN
ctgan = CTGAN(verbose=True)
ctgan.fit(data, categorical_features, epochs = 20)
samples = ctgan.sample(1000)
samples.to_csv('synthetic.csv')

Gen. (2.87) | Discrim. (-0.08): 100%|██████████| 20/20 [00:13<00:00,  1.46it/s]


In [191]:
#SYNTHETIC DATA WITH SVR
from sklearn.svm import SVR
df = pd.read_csv('360ALL.csv')
cts = ['Sex', 'Age', 'Tumor_Grade', 'Tumor_Stage', 'Clinical_Staging', 'ABCB1-2', 'ABCC1-2', 'ABCC2-2', 'ABCC3-2', 'ABCC5-2', 'ABCG2-2',
 'CDK2-2', 'CDKN1A-2', 'LRP1-2', 'STAT5B-2', 'TP53-2', 'Paclitaxel', 'Vincristine', 'Daunorubicin', 'Vinblastine', 'Doxorubicin']
nf = np.square(df.iloc[:,6:-5])
for i in range(11):
  df.insert(5+i,cts[5+i],nf.iloc[:,i])
enc = LabelEncoder()
df['Sex'] = enc.fit_transform(df['Sex'])
df['Tumor_Grade'] = enc.fit_transform(df['Tumor_Grade'])
df['Tumor_Stage'] = enc.fit_transform(df['Tumor_Stage'])
df['Clinical_Staging'] = enc.fit_transform(df['Clinical_Staging'])
x = df.iloc[:,:-5].values
y = df.iloc[:,-5:].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=42)
svr = SVR(kernel ='rbf')
title = ['Paclitaxel','Vincristine','Daunorubicin','Vinblastine','Doxurubicin']
for i in range(5):
  svr.fit(x_train,y_train[:,i])
  y_pred = svr.predict(x_test)
  test_accuracy = r2_score(y_test[:, i], y_pred)
  print(f'{title[i]} Accuracy: {test_accuracy:.4f}')

IndexError: ignored

In [53]:
#SYNTHETIC DATA WITH DECISION TREE
from sklearn.tree import DecisionTreeClassifier
df = pd.read_csv('synthetic.csv')
enc = LabelEncoder()
df['Sex'] = enc.fit_transform(df['Sex'])
df['Tumor_Grade'] = enc.fit_transform(df['Tumor_Grade'])
df['Tumor_Stage'] = enc.fit_transform(df['Tumor_Stage'])
df['Clinical_Staging'] = enc.fit_transform(df['Clinical_Staging'])
df.iloc[:,5:-5] = np.square(df.iloc[:,5:-5])
x = df.iloc[:,:-5].values
y = df.iloc[:,-5:].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.4,random_state=220)
x_train = x_train.astype(int)
y_train = y_train.astype(int)
tree = DecisionTreeClassifier(criterion = 'entropy',max_depth = 50)
for i in range(5):
  tree.fit(x_train,y_train[:,i])
  print(title[i],r2_score(tree.predict(x_test),y_test[:,i]))

Paclitaxel -2.921094762361401
Vincristine -1.4055535531595131
Daunorubicin -0.5500002637015768
Vinblastine -2.1550629306978184
Doxurubicin -1.327130397877696


In [198]:
#SYNTHETIC DATA WITH DECISION TREE AFTER SQUARING GENES
from sklearn.tree import DecisionTreeClassifier
df = pd.read_csv('360ALL.csv')
print(df.shape)
cts = ['Sex', 'Age', 'Tumor_Grade', 'Tumor_Stage', 'Clinical_Staging', 'ABCB1-2', 'ABCC1-2', 'ABCC2-2', 'ABCC3-2', 'ABCC5-2', 'ABCG2-2',
 'CDK2-2', 'CDKN1A-2', 'LRP1-2', 'STAT5B-2', 'TP53-2', 'Paclitaxel', 'Vincristine', 'Daunorubicin', 'Vinblastine', 'Doxorubicin']
nf = np.square(df.iloc[:,5:-5])
print(nf.shape)
for i in range(11):
  df.insert(5+i,cts[5+i],nf.iloc[:,i])
enc = LabelEncoder()
df['Sex'] = enc.fit_transform(df['Sex'])
df['Tumor_Grade'] = enc.fit_transform(df['Tumor_Grade'])
df['Tumor_Stage'] = enc.fit_transform(df['Tumor_Stage'])
df['Clinical_Staging'] = enc.fit_transform(df['Clinical_Staging'])
x = df.iloc[:,:-5].values
y = df.iloc[:,-5:].values
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.4,random_state=220)
x_train = x_train.astype(int)
y_train = y_train.astype(int)
tree = DecisionTreeClassifier(criterion = 'entropy',max_depth = 50)
for i in range(5):
  tree.fit(x_train,y_train[:,i])
  print(title[i],r2_score(tree.predict(x_test),y_test[:,i]))

(341, 21)
(341, 11)
Paclitaxel 0.0
Vincristine 0.0
Daunorubicin 0.0
Vinblastine 0.0
Doxurubicin 0.0
