# Setting up the path

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%ls
%cd drive/MyDrive/Bioinformatics/
%ls

[0m[01;34mdrive[0m/  [01;34msample_data[0m/
/content/drive/MyDrive/Bioinformatics
dataPreprocess.py    [0m[01;34morganization[0m/         README.md
dataStructure.ipynb  PatientNet.ipynb      ShallowLearning.ipynb
Directories.ipynb    PatientNetwork.ipynb  Trial.ipynb
Network.ipynb        [01;34m__pycache__[0m/


In [3]:
import sys
sys.path.append('/content/drive/MyDrive/Bioinformatics')

# Creating the labels

In [4]:
import pandas as pd
import numpy as np

diagnosi = '/content/drive/MyDrive/meso_san_luigi/diagnosi.xls'

#Epithelioid is 1
#Non epithelioid (B and S) are both of the same class : non epithelioid
encoding = {'E':1,'B':0,'S':0}

#Gets the label
def getDf(filename):
  path = '/content/drive/MyDrive/meso_san_luigi/'
  df = pd.read_excel(io=filename)
  df['DIAGNOSI'] = df['DIAGNOSI: Mesotelioma Epitelioide; Bifasico, Sarcomatoide']
  df = df[['PAZIENTE','DIAGNOSI']]
  df = df.dropna()
  patientNumbers = df['PAZIENTE'].map(lambda a: str(a).split(' ')[1].replace('TOR',''))
  df['PAZIENTE'] = df['PAZIENTE'].map(lambda a : path + str(a).split(' ')[1].replace('TOR','M-') + '.ndpi')
  df['DIAGNOSI'] = df['DIAGNOSI'].map(lambda a : encoding[a])


  return df,patientNumbers

df,pNum=getDf(diagnosi)
paths = np.array(df['PAZIENTE'])
labels = np.array(df['DIAGNOSI'])

# Importing the pickles

In [5]:
import pickle
filename = '/content/drive/MyDrive/BioinfoImages/patientFeatures/patientFeatures0.p'
predictions = pickle.load(open(filename,"rb"))

# Creating the dataframes

In [121]:
#np.array(all_patient_predictions)
labels[0:6]

df = pd.DataFrame(predictions, columns=['epithelioid','non','useless'])
labName = {1:'epithelioid',0:'non'}
df['label'] = [labName[x] for x in labels[0:6]]
df

Unnamed: 0,epithelioid,non,useless,label
0,1663.049683,7060.19043,2955.760254,non
1,1561.703369,3046.913574,981.383179,epithelioid
2,547.911438,2053.897949,1263.19043,epithelioid
3,280.615997,3766.875977,338.508087,non
4,4359.495605,10337.09082,8659.415039,epithelioid
5,196.271149,1031.895508,194.833328,epithelioid


In [76]:
def get_intLabels(df):
  toint = {'epithelioid':1,'non':0}
  return [toint[x] for x in df['label']]

In [78]:
df_int = df
df['label'] = get_intLabels(df)

# Plotting

In [7]:
import plotly.express as px
fig = px.scatter_3d(df, x='epithelioid', y='non', z='useless', size = np.full(6,10),
              color='label')
fig.show()

In [8]:
fig = px.scatter(df, x='epithelioid', y='non', size = np.full(6,10),
              color='label')
fig.show()

# Shallow learning 

# Majority voting

In [72]:
def majority_voting(df,array=False):
  if(array):
    predictions = [np.argmax(x)^1 for x in df]

    return np.array(predictions)

  predictions=df.apply((lambda x: np.argmax(x[['epithelioid','non']]) ),axis=1)
  return predictions

In [75]:
import plotly.graph_objects as go
import numpy as np

mesh_size =  200 #.02
margin = 2500 #0.25

color_label={'epithelioid':'#EC8172','non':'#8993f8'}

# Create a mesh grid on which we will run our model
x_min, x_max = np.min(df['epithelioid']) - margin, np.max(df['epithelioid']) + margin
y_min, y_max = np.min(df['non']) - margin, np.max(df['non']) + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

Z = majority_voting(np.c_[xx.ravel(), yy.ravel()],array=True) #[:, 1]
Z = Z.reshape(xx.shape)


# Plot the figure
fig = go.Figure(data=[
    go.Scatter( x = df['epithelioid'], y =df['non'], mode='markers', 
               marker_color=[color_label[x] for x in df['label']], marker_size = 25
    )
])


fig.add_trace(
    go.Contour(
        x=xrange,
        y=yrange,
        z=Z,
        colorscale=['#d7ebef','#b84f47']
    )
  )


fig.show()

### Measuring accuracy

In [80]:
from sklearn.metrics import accuracy_score
y_true = df_int['label']
y_pred = majority_voting(df_int)

majority_accuracy=accuracy_score(y_true, y_pred)
print(majority_accuracy)

0.6666666666666666


In [83]:
df_int['label']

0    0
1    1
2    1
3    0
4    1
5    1
Name: label, dtype: int64

# SVM

In [88]:
grid_result.best_params_

{'C': 0.001, 'kernel': 'linear'}

In [102]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.svm import SVC

classifier=SVC()

gsc = GridSearchCV(
        estimator=classifier,
        param_grid={
            'C': [0.001, 0.01, 0.1, 1, 10, 100,1000],'gamma': [0.001, 0.01, 0.1, 1, 10, 100,1000],
            'kernel':['linear','rbf']

        },
        cv=2,  verbose=0, n_jobs=-1)
grid_result = gsc.fit(df_int[['epithelioid','non']], df_int['label'])
best_params = grid_result.best_params_
best_score=grid_result.best_score_
best_svc = SVC(C=best_params["C"])
best_svc.fit(df_int[['epithelioid','non']], df_int['label'])
print(best_params)
print(best_score)

{'C': 0.001, 'gamma': 0.001, 'kernel': 'linear'}
0.6666666666666666


In [103]:
y_pred = best_svc.predict(df[['epithelioid','non']])
accuracy_score(y_true, y_pred)

0.6666666666666666

In [118]:
from matplotlib.colors import ListedColormap
def plot_decision_boundaries(df,clf):
  mesh_size =  200 #.02
  margin = 2500 #0.25

  color_label={'epithelioid':'#EC8172','non':'#8993f8'}

  # Create a mesh grid on which we will run our model
  x_min, x_max = np.min(df['epithelioid']) - margin, np.max(df['epithelioid']) + margin
  y_min, y_max = np.min(df['non']) - margin, np.max(df['non']) + margin
  xrange = np.arange(x_min, x_max, mesh_size)
  yrange = np.arange(y_min, y_max, mesh_size)
  xx, yy = np.meshgrid(xrange, yrange)

  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) #[:, 1]
  Z = Z.reshape(xx.shape)


  # Plot the figure
  fig = go.Figure(data=[
      go.Scatter( x = df['epithelioid'], y =df['non'], mode='markers', 
                marker_color=[color_label[x] for x in df['label']], marker_size = 25
      )
  ])


  fig.add_trace(
      go.Contour(
          x=xrange,
          y=yrange,
          z=Z,
          colorscale=['#d7ebef','#b84f47']
      )
    )
  
  fig.show()

In [122]:
plot_decision_boundaries(df,best_svc)