# Setting up the path

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%ls
%cd drive/MyDrive/Bioinformatics/
%ls

[0m[01;34mdrive[0m/  [01;34msample_data[0m/
/content/drive/MyDrive/Bioinformatics
dataPreprocess.py    [0m[01;34morganization[0m/         README.md
dataStructure.ipynb  PatientNet.ipynb      ShallowLearning.ipynb
Directories.ipynb    PatientNetwork.ipynb  Trial.ipynb
Network.ipynb        [01;34m__pycache__[0m/


In [3]:
import sys
sys.path.append('/content/drive/MyDrive/Bioinformatics')

# Creating the labels

In [4]:
import pandas as pd
import numpy as np

diagnosi = '/content/drive/MyDrive/meso_san_luigi/diagnosi.xls'

#Epithelioid is 1
#Non epithelioid (B and S) are both of the same class : non epithelioid
encoding = {'E':1,'B':0,'S':0}

#Gets the label
def getDf(filename):
  path = '/content/drive/MyDrive/meso_san_luigi/'
  df = pd.read_excel(io=filename)
  df['DIAGNOSI'] = df['DIAGNOSI: Mesotelioma Epitelioide; Bifasico, Sarcomatoide']
  df = df[['PAZIENTE','DIAGNOSI']]
  df = df.dropna()
  patientNumbers = df['PAZIENTE'].map(lambda a: str(a).split(' ')[1].replace('TOR',''))
  df['PAZIENTE'] = df['PAZIENTE'].map(lambda a : path + str(a).split(' ')[1].replace('TOR','M-') + '.ndpi')
  df['DIAGNOSI'] = df['DIAGNOSI'].map(lambda a : encoding[a])


  return df,patientNumbers

df,pNum=getDf(diagnosi)
paths = np.array(df['PAZIENTE'])
labels = np.array(df['DIAGNOSI'])

# Importing the pickles

In [10]:
import os
import pickle

all_features = {}
path = '/content/drive/MyDrive/BioinfoImages/patientFeatures/'

for feature in os.listdir(path):
  filename = path + feature

  if 'SmallFeature' in filename:
    current_feature = pickle.load(open(filename,"rb"))
    all_features.update(current_feature)

In [12]:
df

Unnamed: 0,PAZIENTE,DIAGNOSI
0,/content/drive/MyDrive/meso_san_luigi/M-1.ndpi,0
1,/content/drive/MyDrive/meso_san_luigi/M-10.ndpi,1
2,/content/drive/MyDrive/meso_san_luigi/M-100.ndpi,1
3,/content/drive/MyDrive/meso_san_luigi/M-101.ndpi,0
4,/content/drive/MyDrive/meso_san_luigi/M-102.ndpi,1
...,...,...
117,/content/drive/MyDrive/meso_san_luigi/M-94.ndpi,1
118,/content/drive/MyDrive/meso_san_luigi/M-96.ndpi,0
119,/content/drive/MyDrive/meso_san_luigi/M-97.ndpi,1
120,/content/drive/MyDrive/meso_san_luigi/M-98.ndpi,1


In [32]:
dfFit = pd.DataFrame.from_dict(all_features,orient='index',columns=["epithelioid","non","useless"])
dfFit = dfFit.reset_index().rename(columns={'index':'PAZIENTE'})
dataset = pd.merge(dfFit,df,on='PAZIENTE')
dataset = dataset.rename(columns = {'DIAGNOSI':'label'})

In [33]:
dataset

Unnamed: 0,PAZIENTE,epithelioid,non,useless,label
0,/content/drive/MyDrive/meso_san_luigi/M-1.ndpi,5391.789551,5746.389160,540.820557,0
1,/content/drive/MyDrive/meso_san_luigi/M-10.ndpi,2791.386475,2632.429199,166.183868,1
2,/content/drive/MyDrive/meso_san_luigi/M-44.ndpi,2039.272949,1157.257568,460.469604,1
3,/content/drive/MyDrive/meso_san_luigi/M-45.ndpi,13094.708984,5592.917480,1035.375977,1
4,/content/drive/MyDrive/meso_san_luigi/M-46.ndpi,7141.227539,5723.562988,510.207336,1
...,...,...,...,...,...
59,/content/drive/MyDrive/meso_san_luigi/M-124.ndpi,675.113220,757.826294,102.060394,1
60,/content/drive/MyDrive/meso_san_luigi/M-13.ndpi,43775.156250,13347.104492,2524.743896,1
61,/content/drive/MyDrive/meso_san_luigi/M-14.ndpi,8646.735352,9200.905273,1095.354980,1
62,/content/drive/MyDrive/meso_san_luigi/M-15.ndpi,3225.943359,3963.395752,489.660675,1


In [42]:
len(dataset)

64

# Plotting

In [48]:
import plotly.express as px
fig = px.scatter_3d(dataset, x='epithelioid', y='non', z='useless', size = np.full(len(dataset),10),
              color='label',color_continuous_scale=['#EC8172','#8993f8'])
fig.show()

In [49]:
fig = px.scatter(dataset, x='epithelioid', y='non', size = np.full(len(dataset),10),
              color='label', color_continuous_scale=['#EC8172','#8993f8'])
fig.show()

# Shallow learning 

# Majority voting

In [51]:
def majority_voting(df,array=False):
  if(array):
    predictions = [np.argmax(x)^1 for x in df]

    return np.array(predictions)

  predictions=df.apply((lambda x: np.argmax(x[['epithelioid','non']]) ),axis=1)
  return predictions

In [57]:
import plotly.graph_objects as go
import numpy as np

def plot_majority(df):
  mesh_size =  200 #.02
  margin = 2500 #0.25

  color_label={1:'#EC8172',0:'#8993f8'}

  # Create a mesh grid on which we will run our model
  x_min, x_max = np.min(df['epithelioid']) - margin, np.max(df['epithelioid']) + margin
  y_min, y_max = np.min(df['non']) - margin, np.max(df['non']) + margin
  xrange = np.arange(x_min, x_max, mesh_size)
  yrange = np.arange(y_min, y_max, mesh_size)
  xx, yy = np.meshgrid(xrange, yrange)

  Z = majority_voting(np.c_[xx.ravel(), yy.ravel()],array=True) #[:, 1]
  Z = Z.reshape(xx.shape)


  # Plot the figure
  fig = go.Figure(data=[
      go.Scatter( x = df['epithelioid'], y =df['non'], mode='markers', 
                marker_color=[color_label[x] for x in df['label']], marker_size = 25
      )
  ])


  fig.add_trace(
      go.Contour(
          x=xrange,
          y=yrange,
          z=Z,
          colorscale=['#d7ebef','#b84f47']
      )
    )


  fig.show()

In [58]:
plot_majority(dataset)

### Measuring accuracy

In [61]:
from sklearn.metrics import accuracy_score
y_true = dataset['label']
y_pred = majority_voting(dataset)

majority_accuracy=accuracy_score(y_true, y_pred)
print(majority_accuracy)

0.296875


# SVM

In [66]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.svm import SVC

classifier=SVC()

gsc = GridSearchCV(
        estimator=classifier,
        param_grid={
            'C': [0.001, 0.01, 0.1, 1],'gamma': [0.001, 0.01, 0.1, 1],
            'kernel':['linear','rbf']

        },
        cv=2,  verbose=0, n_jobs=-1)
grid_result = gsc.fit(dataset[['epithelioid','non']], dataset['label'])
best_params = grid_result.best_params_
best_score=grid_result.best_score_
best_svc = SVC(C=best_params["C"])
best_svc.fit(df_int[['epithelioid','non']], df_int['label'])
print(best_params)
print(best_score)

KeyboardInterrupt: ignored

In [None]:
y_pred = best_svc.predict(df[['epithelioid','non']])
accuracy_score(y_true, y_pred)

0.6666666666666666

In [None]:
from matplotlib.colors import ListedColormap
def plot_decision_boundaries(df,clf):
  mesh_size =  200 #.02
  margin = 2500 #0.25

  color_label={'epithelioid':'#EC8172','non':'#8993f8'}

  # Create a mesh grid on which we will run our model
  x_min, x_max = np.min(df['epithelioid']) - margin, np.max(df['epithelioid']) + margin
  y_min, y_max = np.min(df['non']) - margin, np.max(df['non']) + margin
  xrange = np.arange(x_min, x_max, mesh_size)
  yrange = np.arange(y_min, y_max, mesh_size)
  xx, yy = np.meshgrid(xrange, yrange)

  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) #[:, 1]
  Z = Z.reshape(xx.shape)


  # Plot the figure
  fig = go.Figure(data=[
      go.Scatter( x = df['epithelioid'], y =df['non'], mode='markers', 
                marker_color=[color_label[x] for x in df['label']], marker_size = 25
      )
  ])


  fig.add_trace(
      go.Contour(
          x=xrange,
          y=yrange,
          z=Z,
          colorscale=['#d7ebef','#b84f47']
      )
    )
  
  fig.show()

In [None]:
plot_decision_boundaries(dataset,best_svc)