In [None]:
# header files
%matplotlib inline
import glob
import csv
import numpy as np
import pandas as pd
from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest
from sksurv.metrics import (
    concordance_index_censored,
    concordance_index_ipcw,
    cumulative_dynamic_auc,
    integrated_brier_score,
)
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_regression, f_classif
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 10]
print("Header files loaded!")

In [None]:
# hyper-parameters
is_ovarian_cancer = 1
is_cervix_cancer = 0
is_endometrial_cancer = 0
is_ccf = 1

In [None]:
# load ovarian cancer files
if is_ovarian_cancer:
    oc_files = (glob.glob("../results/oc_collagen_features/window_1/*"))
    print(len(oc_files))

In [None]:
# collect features
if is_ovarian_cancer:
    collagen_features = []
    for file in oc_files:
        filename = file.split("/")[-1]
        flag = -1
        file_features = []
        with open(file, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_2/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_3/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_4/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_5/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_6/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_7/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_8/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../results/oc_collagen_features/window_9/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
        collagen_features.append(file_features)

In [None]:
# create output survival information for training model and get til features
if is_ovarian_cancer:
    til_features = []
    censor = []
    days = []
    filenames = []
    flag = -1
    with open("../../tcga_oc_clinicaldata.csv", newline='', encoding = "ISO-8859-1") as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
                print(row)
            else:
                array = row
                filenames.append(array[0])
                f_features = [float(array[1]), float(array[2]), float(array[3]), float(array[4]), float(array[5]), float(array[6]), float(array[7])]
                til_features.append(f_features)
                censor.append(bool(int(array[20])))
                days.append(int(array[23]))
                
    final_til_features = []
    y = []
    event = []
    survival_time = []
    for file in oc_files:
        count = 0
        filename1 = file.split("/")[-1][:-4]
        for filename in filenames:
            filename2 = filename
            if filename1 == filename2:
                final_til_features.append(til_features[count])
                y.append([censor[count], days[count]])
                event.append(censor[count])
                survival_time.append(days[count])
            count += 1
    print(len(final_til_features))
    print(len(y))
    print(len(event))
    print(len(survival_time))

In [None]:
# generate training set for training model
features = []
for index in range(0, len(oc_files)):
    features.append(final_til_features[index]+collagen_features[index])
    #features.append(collagen_features[index])
    #features.append(final_til_features[index])
print(len(features))
print(len(features[0]))

In [None]:
# final training information to be used for training model
features = np.array(features)
y = np.array(y)
event = np.array(event)
survival_time = np.array(survival_time)

In [None]:
# load cervix cancer files
if is_ccf:
    test_ccf_files = (glob.glob("../../ccf_ovarian_cancer/collagen_feature_maps_200_final/*"))
    print(len(test_ccf_files))

In [None]:
# collect test features
if is_ccf:
    test_collagen_features = []
    for file in test_ccf_files:
        filename = file.split("/")[-1]
        flag = -1
        file_features = []
        with open(file, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_250_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_300_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_350_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_400_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_450_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_500_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_550_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
                    
        with open("../../ccf_ovarian_cancer/collagen_feature_maps_600_final/" + filename, newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                if flag == -1:
                    array = row
                    for index in range(0, len(array)-1):
                        file_features.append(float(array[index]))
        test_collagen_features.append(file_features)

In [None]:
# create output survival information for training model and get til features
if is_ccf:
    til_features_ovarian = []
    censor_ovarian = []
    days_ovarian = []
    filenames_ovarian = []
    til_features_cervix = []
    censor_cervix = []
    days_cervix = []
    filenames_cervix = []
    til_features_endometrial = []
    censor_endometrial = []
    days_endometrial = []
    filenames_endometrial = []
    flag = -1
    with open("../../tcga_oc_cc_ec_clinicaldata.csv", newline='', encoding = "ISO-8859-1") as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
                print(row)
            else:
                array = row
                if array[14] == "Cervix" and array[15] == "Immunotherapy":
                    filenames_cervix.append(array[0])
                    f_features = [float(array[1]), float(array[2]), float(array[3]), float(array[4]), float(array[5]), float(array[6]), float(array[7])]
                    til_features_cervix.append(f_features)
                    censor_cervix.append(bool(int(array[18])))
                    days_cervix.append(int(array[17]))
                    
                if array[14] == "Uterine" and array[15] == "Immunotherapy":
                    filenames_endometrial.append(array[0])
                    f_features = [float(array[1]), float(array[2]), float(array[3]), float(array[4]), float(array[5]), float(array[6]), float(array[7])]
                    til_features_endometrial.append(f_features)
                    censor_endometrial.append(bool(int(array[18])))
                    days_endometrial.append(int(array[17]))
                    
                if array[14] == "Ovary" and array[15] == "Immunotherapy":
                    filenames_ovarian.append(array[0])
                    f_features = [float(array[1]), float(array[2]), float(array[3]), float(array[4]), float(array[5]), float(array[6]), float(array[7])]
                    til_features_ovarian.append(f_features)
                    censor_ovarian.append(bool(int(array[18])))
                    days_ovarian.append(int(array[17]))
    print(len(filenames_ovarian))
    print(len(filenames_cervix))
    print(len(filenames_endometrial))

In [None]:
final_filenames_ovarian = []
final_til_features_ovarian = []
final_collagen_features_ovarian = []
y_ovarian = []
event_ovarian = []
survival_time_ovarian = []
index = 0
for file in test_ccf_files:
    count = 0
    filename1 = file.split("/")[-1][:-6]
    filename1 = filename1[3:]
    flag = -1
    for filename in filenames_ovarian:
        filename2 = filename
        if filename1 == filename2:
            final_til_features_ovarian.append(til_features_ovarian[count])
            y_ovarian.append([censor_ovarian[count], days_ovarian[count]])
            event_ovarian.append(censor_ovarian[count])
            survival_time_ovarian.append(days_ovarian[count])
            final_collagen_features_ovarian.append(test_collagen_features[index])
            final_filenames_ovarian.append(file)
            flag = 1
        count += 1
    index += 1
print(len(final_filenames_ovarian))
print(len(final_collagen_features_ovarian))
print(len(final_til_features_ovarian))
print(len(y_ovarian))
print(len(event_ovarian))
print(len(survival_time_ovarian))

In [None]:
final_filenames_cervix = []
final_til_features_cervix = []
final_collagen_features_cervix = []
y_cervix = []
event_cervix = []
survival_time_cervix = []
index = 0
for file in test_ccf_files:
    count = 0
    filename1 = file.split("/")[-1][:-6]
    filename1 = filename1[3:]
    flag = -1
    for filename in filenames_cervix:
        filename2 = filename
        
        if filename2 == "3":
            continue
        
        if filename1 == filename2:
            final_til_features_cervix.append(til_features_cervix[count])
            y_cervix.append([censor_cervix[count], days_cervix[count]])
            event_cervix.append(censor_cervix[count])
            survival_time_cervix.append(days_cervix[count])
            final_collagen_features_cervix.append(test_collagen_features[index])
            final_filenames_cervix.append(file)
            flag = 1
        count += 1
    index += 1
print(len(final_filenames_cervix))
print(len(final_collagen_features_cervix))
print(len(final_til_features_cervix))
print(len(y_cervix))
print(len(event_cervix))
print(len(survival_time_cervix))

In [None]:
final_filenames_endometrial = []
final_til_features_endometrial = []
final_collagen_features_endometrial = []
y_endometrial = []
event_endometrial = []
survival_time_endometrial = []
index = 0
for file in test_ccf_files:
    count = 0
    filename1 = file.split("/")[-1][:-6]
    filename1 = filename1[3:]
    flag = -1
    for filename in filenames_endometrial:
        filename2 = filename
        if filename1 == filename2:
            final_til_features_endometrial.append(til_features_endometrial[count])
            y_endometrial.append([censor_endometrial[count], days_endometrial[count]])
            event_endometrial.append(censor_endometrial[count])
            survival_time_endometrial.append(days_endometrial[count])
            final_collagen_features_endometrial.append(test_collagen_features[index])
            final_filenames_endometrial.append(file)
            flag = 1
        count += 1
    index += 1
print(len(final_filenames_endometrial))
print(len(final_collagen_features_endometrial))
print(len(final_til_features_endometrial))
print(len(y_endometrial))
print(len(event_endometrial))
print(len(survival_time_endometrial))

In [None]:
# generate training set for testing model
test_features = []
for index in range(0, len(final_filenames_endometrial)):
    test_features.append(final_til_features_endometrial[index] + final_collagen_features_endometrial[index])
#    test_features.append(final_collagen_features_endometrial[index])
#    test_features.append(final_til_features_endometrial[index])

for index in range(0, len(final_filenames_cervix)):
    test_features.append(final_til_features_cervix[index] + final_collagen_features_cervix[index])
#    test_features.append(final_collagen_features_cervix[index])
#    test_features.append(final_til_features_cervix[index])

for index in range(0, len(final_filenames_ovarian)):
    test_features.append(final_til_features_ovarian[index] + final_collagen_features_ovarian[index])
#    test_features.append(final_collagen_features_ovarian[index])
#    test_features.append(final_til_features_ovarian[index])

print(len(test_features))
print(len(test_features[0]))

In [None]:
# run on test set
group = []
features_train = features
features_test = test_features
y_train = y
event_train, survival_time_train = event, survival_time
dt = dtype=[('Status', '?'), ('Survival_in_days', '<f8')]
y_train = np.array([tuple(row) for row in y_train], dtype=dt)
scaler = MinMaxScaler()
features_train = scaler.fit_transform(features_train)
features_test = scaler.transform(features_test)
features_train_df = pd.DataFrame(features_train)
features_test_df = pd.DataFrame(features_test)
        
# fit model
estimator = CoxnetSurvivalAnalysis(l1_ratio=0.9, alpha_min_ratio=0.1)
estimator.fit(features_train_df, y_train)

score, _, _, _, _ = concordance_index_censored(event_endometrial + event_cervix + event_ovarian, survival_time_endometrial + survival_time_cervix + survival_time_ovarian, estimator.predict(features_test_df))
print("Test: " + str(score))
score, _, _, _, _ = concordance_index_censored(event, survival_time, estimator.predict(features_train_df))
print("Train: " + str(score))

# get risk scores
train_risk_scores = estimator.predict(features_train_df)
test_risk_scores = estimator.predict(features_test_df)

median = np.mean(train_risk_scores)
count_low = 0
count_high = 0
for index in range(0, len(test_risk_scores)):
    if test_risk_scores[index] > median:
        count_high += 1
        group.append(1)
    else:
        count_low += 1
        group.append(0)

In [None]:
a = []
for index in range(0, len(event_endometrial)):
    if event_endometrial[index] == False:
        a.append(0)
    else:
        a.append(1)
for index in range(0, len(event_cervix)):
    if event_cervix[index] == False:
        a.append(0)
    else:
        a.append(1)
for index in range(0, len(event_ovarian)):
    if event_ovarian[index] == False:
        a.append(0)
    else:
        a.append(1)
print(*a, sep="; ")

In [None]:
print(*(survival_time_endometrial + survival_time_cervix + survival_time_ovarian), sep="; ")

In [None]:
print(*group, sep="; ")

In [None]:
high_train_labels_status = []
high_train_labels_days = []
low_train_labels_status = []
low_train_labels_days = []
event_t = event_endometrial + event_cervix + event_ovarian
survival_time_t = survival_time_endometrial + survival_time_cervix + survival_time_ovarian
for index in range(0, len(group)):
    if group[index] == 1:
        high_train_labels_status.append(event_t[index])
        high_train_labels_days.append(survival_time_t[index])
    else:
        low_train_labels_status.append(event_t[index])
        low_train_labels_days.append(survival_time_t[index])

time_high, survival_prob_high = kaplan_meier_estimator(high_train_labels_status, high_train_labels_days)
time_low, survival_prob_low = kaplan_meier_estimator(low_train_labels_status, low_train_labels_days)
plt.step(time_high, survival_prob_high, where="post", label="High Risk")
plt.step(time_low, survival_prob_low, where="post", label="Low Risk")
plt.ylabel("Progression-free Survival")
plt.xlabel("Time (in days)")
plt.legend(loc="best")

In [None]:
# find prognostic features from model trained above
count = 0
for index1 in range(0, len(estimator.coef_)):
    flag = -1
    for index2 in range(0, len(estimator.coef_[index1])):
        if estimator.coef_[index1][index2] > 0:
            flag = 1
            print(index1)
            break
    if flag == 1:
        count += 1
print()
print("Prognostic features count = " + str(count))

In [None]:
# umap analysis
import umap
f = []
c = []
f_1 = []
c_1 = []
for index in range(0, len(features)):
    f.append(features[index])
    c.append((1, 0, 0))
for index in range(0, len(test_features)):
    f_1.append(test_features[index])
    c_1.append((0, 1, 0))

In [None]:
trans = umap.UMAP(n_neighbors=5, random_state=42).fit(f)
f_1_transform = trans.transform(f_1)

In [None]:
plt.scatter(trans.embedding_[:, 0], trans.embedding_[:, 1], s= 5, c=c, cmap='Spectral')
plt.scatter(f_1_transform[:, 0], f_1_transform[:, 1], s= 5, c=c_1, cmap='Spectral')
plt.title("UMAP Embedding of Collagen+SpaTIL features")

In [None]:
row_1 = [] 
row_2 = [] 
row_3 = [] 
row_4 = []
files_t = final_filenames_endometrial + final_filenames_cervix + final_filenames_ovarian
for index in range(0, len(files_t)):
    filename = files_t[index].split("/")[-1][:-4]
    flag = 0
    flag_category = "low"
    if test_risk_scores[index] > median:
        flag = 1
        flag_category = "high"
    row_1.append(str(filename))
    row_2.append(str(test_risk_scores[index]))
    row_3.append(str(flag))
    row_4.append(str(flag_category))

with open("../../ccf_collagen_til.csv", 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile)
    spamwriter.writerow(["Patient Id", "Risk Score", "Risk (Numerical)", "Risk (Category)"])
    for index in range(0, len(files_t)):
        spamwriter.writerow([row_1[index], row_2[index], row_3[index], row_4[index]])

In [None]:
# response analysis
no_response = []
for index in range(0, len(final_filenames_endometrial)):
    filename = final_filenames_endometrial[index].split("/")[-1][:-6]
    if filename == "CCF2" or filename == "CCF3" or filename == "CCF5" or filename == "CCF12" or filename == "CCF13" or filename == "CCF14" or filename == "CCF15" or filename == "CCF16" or filename == "CCF17" or filename == "CCF21" or filename == "CCF26" or filename == "CCF29" or filename == "CCF32" or filename == "CCF33" or filename == "CCF36" or filename == "CCF38" or filename == "CCF41" or filename == "CCF43" or filename == "CCF45" or filename == "CCF47" or filename == "CCF48":
        no_response.append(group[index])
        
for index in range(0, len(final_filenames_cervix)):
    filename = final_filenames_cervix[index].split("/")[-1][:-6]
    if filename == "CCF2" or filename == "CCF3" or filename == "CCF5" or filename == "CCF12" or filename == "CCF13" or filename == "CCF14" or filename == "CCF15" or filename == "CCF16" or filename == "CCF17" or filename == "CCF21" or filename == "CCF26" or filename == "CCF29" or filename == "CCF32" or filename == "CCF33" or filename == "CCF36" or filename == "CCF38" or filename == "CCF41" or filename == "CCF43" or filename == "CCF45" or filename == "CCF47" or filename == "CCF48":
        no_response.append(group[index+len(final_filenames_endometrial)])

for index in range(0, len(final_filenames_ovarian)):
    filename = final_filenames_ovarian[index].split("/")[-1][:-6]
    if filename == "CCF2" or filename == "CCF3" or filename == "CCF5" or filename == "CCF12" or filename == "CCF13" or filename == "CCF14" or filename == "CCF15" or filename == "CCF16" or filename == "CCF17" or filename == "CCF21" or filename == "CCF26" or filename == "CCF29" or filename == "CCF32" or filename == "CCF33" or filename == "CCF36" or filename == "CCF38" or filename == "CCF41" or filename == "CCF43" or filename == "CCF45" or filename == "CCF47" or filename == "CCF48":
        no_response.append(group[index+len(final_filenames_endometrial)+len(final_collagen_features_cervix)])

In [None]:
print(no_response)

In [None]:
# response analysis
yes_response = []
for index in range(0, len(final_filenames_endometrial)):
    filename = final_filenames_endometrial[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49":
        yes_response.append(group[index])
        
for index in range(0, len(final_filenames_cervix)):
    filename = final_filenames_cervix[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49":
        yes_response.append(group[index+len(final_filenames_endometrial)])

for index in range(0, len(final_filenames_ovarian)):
    filename = final_filenames_ovarian[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49":
        yes_response.append(group[index+len(final_filenames_endometrial)+len(final_collagen_features_cervix)])

In [None]:
print(yes_response)

In [None]:
# response analysis
yes_response = []
for index in range(0, len(final_filenames_endometrial)):
    filename = final_filenames_endometrial[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49" or filename == "CCF4" or filename == "CCF6" or filename == "CCF7" or filename == "CCF9" or filename == "CCF30" or filename == "CCF31" or filename == "CCF39" or filename == "CCF44":
        yes_response.append(group[index])
        
for index in range(0, len(final_filenames_cervix)):
    filename = final_filenames_cervix[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49" or filename == "CCF4" or filename == "CCF6" or filename == "CCF7" or filename == "CCF9" or filename == "CCF30" or filename == "CCF31" or filename == "CCF39" or filename == "CCF44":
        yes_response.append(group[index+len(final_filenames_endometrial)])

for index in range(0, len(final_filenames_ovarian)):
    filename = final_filenames_ovarian[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF8" or filename == "CCF10" or filename == "CCF11" or filename == "CCF18" or filename == "CCF19" or filename == "CCF20" or filename == "CCF22" or filename == "CCF23" or filename == "CCF24" or filename == "CCF25" or filename == "CCF27" or filename == "CCF28" or filename == "CCF34" or filename == "CCF35" or filename == "CCF37" or filename == "CCF40" or filename == "CCF42" or filename == "CCF46" or filename == "CCF49" or filename == "CCF4" or filename == "CCF6" or filename == "CCF7" or filename == "CCF9" or filename == "CCF30" or filename == "CCF31" or filename == "CCF39" or filename == "CCF44":
        yes_response.append(group[index+len(final_filenames_endometrial)+len(final_collagen_features_cervix)])

In [None]:
print(yes_response)

In [None]:
# msi status analysis
stable_msi = []
for index in range(0, len(final_filenames_endometrial)):
    filename = final_filenames_endometrial[index].split("/")[-1][:-6]
    if filename == "CCF7" or filename == "CCF8" or filename == "CCF13" or filename == "CCF14" or filename == "CCF17" or filename == "CCF20" or filename == "CCF22" or filename == "CCF25" or filename == "CCF27" or filename == "CCF31" or filename == "CCF34" or filename == "CCF35" or filename == "CCF38" or filename == "CCF41" or filename == "CCF44" or filename == "CCF47" or filename == "CCF48":
        stable_msi.append(group[index])
        
for index in range(0, len(final_filenames_cervix)):
    filename = final_filenames_cervix[index].split("/")[-1][:-6]
    if filename == "CCF7" or filename == "CCF8" or filename == "CCF13" or filename == "CCF14" or filename == "CCF17" or filename == "CCF20" or filename == "CCF22" or filename == "CCF25" or filename == "CCF27" or filename == "CCF31" or filename == "CCF34" or filename == "CCF35" or filename == "CCF38" or filename == "CCF41" or filename == "CCF44" or filename == "CCF47" or filename == "CCF48":
        stable_msi.append(group[index+len(final_filenames_endometrial)])

for index in range(0, len(final_filenames_ovarian)):
    filename = final_filenames_ovarian[index].split("/")[-1][:-6]
    if filename == "CCF7" or filename == "CCF8" or filename == "CCF13" or filename == "CCF14" or filename == "CCF17" or filename == "CCF20" or filename == "CCF22" or filename == "CCF25" or filename == "CCF27" or filename == "CCF31" or filename == "CCF34" or filename == "CCF35" or filename == "CCF38" or filename == "CCF41" or filename == "CCF44" or filename == "CCF47" or filename == "CCF48":
        stable_msi.append(group[index+len(final_filenames_endometrial)+len(final_collagen_features_cervix)])

In [None]:
print(stable_msi)

In [None]:
# msi status analysis
unstable_msi = []
for index in range(0, len(final_filenames_endometrial)):
    filename = final_filenames_endometrial[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF2" or filename == "CCF9" or filename == "CCF10" or filename == "CCF11" or filename == "CCF16" or filename == "CCF18" or filename == "CCF19" or filename == "CCF21" or filename == "CCF23" or filename == "CCF24" or filename == "CCF29" or filename == "CCF40" or filename == "CCF42" or filename == "CCF45" or filename == "CCF46":
        unstable_msi.append(group[index])
        
for index in range(0, len(final_filenames_cervix)):
    filename = final_filenames_cervix[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF2" or filename == "CCF9" or filename == "CCF10" or filename == "CCF11" or filename == "CCF16" or filename == "CCF18" or filename == "CCF19" or filename == "CCF21" or filename == "CCF23" or filename == "CCF24" or filename == "CCF29" or filename == "CCF40" or filename == "CCF42" or filename == "CCF45" or filename == "CCF46":
        unstable_msi.append(group[index+len(final_filenames_endometrial)])

for index in range(0, len(final_filenames_ovarian)):
    filename = final_filenames_ovarian[index].split("/")[-1][:-6]
    if filename == "CCF1" or filename == "CCF2" or filename == "CCF9" or filename == "CCF10" or filename == "CCF11" or filename == "CCF16" or filename == "CCF18" or filename == "CCF19" or filename == "CCF21" or filename == "CCF23" or filename == "CCF24" or filename == "CCF29" or filename == "CCF40" or filename == "CCF42" or filename == "CCF45" or filename == "CCF46":
        unstable_msi.append(group[index+len(final_filenames_endometrial)+len(final_collagen_features_cervix)])

In [None]:
print(unstable_msi)