In [1]:
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
moaks_fnih_sq00 = pd.read_csv('/home/anastasis/EMC_Thesis/OAI Data/OAICompleteData_ASCII/kMRI_FNIH_SQ_MOAKS_BICL00.txt', sep="|")

In [3]:
print(moaks_fnih_sq00.info(),'\n',moaks_fnih_sq00.iloc[:,-3])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 122 entries, ID to V00MTCMNTS
dtypes: float64(1), int64(17), object(104)
memory usage: 572.0+ KB
None 
 0      1: Yes
1      1: Yes
2      1: Yes
3       0: No
4       0: No
        ...  
595    1: Yes
596     0: No
597     0: No
598     0: No
599     0: No
Name: V00MPOPCYS, Length: 600, dtype: object


# NOW WE DELETE ALL THE COMMENTS OF THE INITIAL DATAFRAME AND CONVERT THE VALUES TO NUMBERS

In [4]:
# Firstly we replace all the comments
moaks_without_commments = moaks_fnih_sq00.replace(r'(\:).*$',r'\1',regex=True)
# Secondly we replace all the '.:' and the ':' bullets with space and remove 'R' from Reader Column
moaks_without_commments_bullets = moaks_without_commments.replace({'\.\:':'',':':'','R':''},regex=True)
# Thirdly we remove the Comments and Technical Considerations Columns from the whole Dataset
moaks_without_commments_bullets = moaks_without_commments_bullets.drop(columns=['V00MCMNTS','V00MTCMNTS'])
print(moaks_without_commments_bullets.READPRJ.value_counts())

22    600
Name: READPRJ, dtype: int64


In [5]:
# THEN WE CONVERT ALL THE DF VALUES TO FLOAT AND THE ID AND SIDE TO INT
moaks_without_c_b_numeric =  moaks_without_commments_bullets.apply(pd.to_numeric,downcast='float')
moaks_without_c_b_numeric[['ID','SIDE']] = moaks_without_c_b_numeric[['ID','SIDE']].astype('int')
print(moaks_without_c_b_numeric.info())
print('Number of NaN values are : ',moaks_without_c_b_numeric.isna().sum().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V00MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
None
Number of NaN values are :  947


In [6]:
print('Number of NaN values in the initial dataframe: {}'.format(moaks_without_c_b_numeric.loc[:,:].isna().sum().to_list()))

Number of NaN values in the initial dataframe: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 113, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 600, 0]


In [7]:
moaks_without_c_b_nan_numeric = moaks_without_c_b_numeric.fillna(-1.0)
moaks_without_c_b_nan_numeric.info()
print('Number of NaN values in the Dataframe: {}'.format(moaks_without_c_b_nan_numeric.isna().sum().sum()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V00MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
Number of NaN values in the Dataframe: 0


In [8]:
# So the clean version of the moaks_fnih_sq00 is the below: 
moaks_grades = moaks_without_c_b_nan_numeric
print(moaks_grades.info())
moaks_grades.to_csv('moaks_fnih_sq00_initial_numeric_df.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V00MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
None


# NOW WE LOAD THE RADIOGRAPH INFORMATION DATABASE IN ORDER TO ACQUIRE THE XR OSTEOPHYTES

In [9]:
kxr_sq_bu00= pd.read_csv('/home/anastasis/EMC_Thesis/OAI Data/OAICompleteData_ASCII/kxr_sq_bu00.txt', sep="|")
KL_prj15 = kxr_sq_bu00[kxr_sq_bu00['READPRJ'] == 15].copy()
KL_prj15_osteophytes = KL_prj15.loc[:,['ID','SIDE','V00XROSFL','V00XROSFM','V00XROSTL','V00XROSTM','V00XRKL']]


In [10]:
#now we create the df that contains both MOAKS grades and XR Osteophytes
common_fnih_kxr_00 = pd.merge(moaks_grades,KL_prj15_osteophytes,on=['ID','SIDE'],how='inner')
common_fnih_kxr_00.fillna(0.0,inplace=True)
common_fnih_kxr_00.isna().sum()

ID           0
SIDE         0
VERSION      0
READPRJ      0
V00READER    0
            ..
V00XROSFL    0
V00XROSFM    0
V00XROSTL    0
V00XROSTM    0
V00XRKL      0
Length: 125, dtype: int64

In [54]:
# here we exlude the subjects with XRKL= 0 BUT XROSFL or XROSFM or XROSTL or XROSTM != 0
common_fnih_kxr_00.drop(common_fnih_kxr_00.loc[(common_fnih_kxr_00.V00XRKL==0) & ((common_fnih_kxr_00.V00XROSFL != 0) |
 (common_fnih_kxr_00.V00XROSFM != 0) | (common_fnih_kxr_00.V00XROSTL != 0) | (common_fnih_kxr_00.V00XROSTM != 0))].index,inplace=True)
common_fnih_kxr_00

Unnamed: 0,ID,SIDE,VERSION,READPRJ,V00READER,V00MCMPM,V00MCMPL,V00MCMFMA,V00MCMFLA,V00MCMFMP,...,V00MPPBUR,V00MSYIC,V00MEFFWK,V00MITBSIG,V00MPOPCYS,V00XROSFL,V00XROSFM,V00XROSTL,V00XROSTM,V00XRKL
0,9001695,1,0.1,22.0,2.0,2.0,0.0,1.0,0.0,1.0,...,0.0,2.0,2.0,-1.0,1.0,3.0,3.0,2.0,3.0,2.0
1,9002116,2,0.1,22.0,2.0,2.0,0.0,2.0,1.0,2.0,...,0.0,1.0,2.0,-1.0,1.0,1.0,0.0,1.0,2.0,3.0
2,9002430,1,0.1,22.0,2.0,2.0,0.0,2.2,0.0,2.0,...,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,2.0
3,9002817,1,0.1,22.0,2.0,2.0,1.0,0.0,0.0,2.0,...,0.0,2.0,1.0,-1.0,0.0,0.0,2.0,1.0,1.0,3.0
4,9003316,1,0.1,22.0,1.0,0.0,3.0,0.0,2.2,0.0,...,0.0,0.0,1.0,-1.0,0.0,1.0,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,9993833,2,0.1,22.0,1.0,0.0,2.0,2.2,2.2,0.0,...,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,3.0
596,9994408,1,0.1,22.0,1.0,3.3,2.0,2.2,0.0,0.0,...,0.0,1.0,2.0,-1.0,0.0,0.0,1.0,1.0,1.0,3.0
597,9995338,2,0.1,22.0,1.0,0.0,2.2,0.0,0.0,0.0,...,0.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,2.0
598,9996098,1,0.1,22.0,1.0,3.2,3.0,2.2,2.2,2.0,...,0.0,1.0,1.0,-1.0,0.0,1.0,2.0,0.0,1.0,3.0


In [55]:
common_fnih_kxr_00[common_fnih_kxr_00.V00XRKL==0]

Unnamed: 0,ID,SIDE,VERSION,READPRJ,V00READER,V00MCMPM,V00MCMPL,V00MCMFMA,V00MCMFLA,V00MCMFMP,...,V00MPPBUR,V00MSYIC,V00MEFFWK,V00MITBSIG,V00MPOPCYS,V00XROSFL,V00XROSFM,V00XROSTL,V00XROSTM,V00XRKL


# NOW WE FIND THE SUBJECTS THAT FULFILL THE OA FEATURES FOR THE WHOLE KNEE JOINT

In [11]:
# FRIST WE CONSTRUCT A NEW DATAFRAME WITH ALL THE OA FEATURES AS COLUMNS
# The 'AnyTCL' column is an assistive variable that indicates as 1.0
# the subjects with at least on variable of thickness cartilage loss >=1.0
# and as 0.0 the subjects that DO NOT have any thickness cartilage loss

moaks_OA_features = pd.DataFrame(
    columns=['ID','SIDE','XR_Osteophytes','FullTCL','AnyTCL',
             'PartialTCL','BML', 'Meniscus_Degradation'])
# the ID and SIDE columns are integers
moaks_OA_features[['ID','SIDE']] = common_fnih_kxr_00[['ID','SIDE']]
# and we fill the rest of the df with np.nan values
moaks_OA_features.iloc[:,2:] = np.nan

print('Type of ID and SIDE columns {} and type of the rest of the columns {}'
    .format(type(moaks_OA_features.iloc[0,0]),type(moaks_OA_features.iloc[0,2])))


Type of ID and SIDE columns <class 'numpy.int64'> and type of the rest of the columns <class 'numpy.float64'>


In [12]:
moaks_OA_features

Unnamed: 0,ID,SIDE,XR_Osteophytes,FullTCL,AnyTCL,PartialTCL,BML,Meniscus_Degradation
0,9001695,1,,,,,,
1,9002116,2,,,,,,
2,9002430,1,,,,,,
3,9002817,1,,,,,,
4,9003316,1,,,,,,
...,...,...,...,...,...,...,...,...
595,9993833,2,,,,,,
596,9994408,1,,,,,,
597,9995338,2,,,,,,
598,9996098,1,,,,,,


In [13]:
# Then we fill the FullTCL column with all the subjects with 
# at least one cartilage thickness loss variable larger than 3.0
# and the FullTCL cell value is 1.0
moaks_OA_features.loc[((common_fnih_kxr_00['V00MCMFLA'] >= 3.0) | (common_fnih_kxr_00['V00MCMFLP'] >= 3.0) |
                (common_fnih_kxr_00['V00MCMFLC'] >= 3.0) | (common_fnih_kxr_00['V00MCMTLA'] >= 3.0) |
                (common_fnih_kxr_00['V00MCMTLP'] >= 3.0) | (common_fnih_kxr_00['V00MCMTLC'] >= 3.0) |
                (common_fnih_kxr_00['V00MCMFMA'] >= 3.0) | (common_fnih_kxr_00['V00MCMFMP'] >= 3.0) |
                (common_fnih_kxr_00['V00MCMFMC'] >= 3.0) | (common_fnih_kxr_00['V00MCMTMA'] >= 3.0) |
                (common_fnih_kxr_00['V00MCMTMP'] >= 3.0) | (common_fnih_kxr_00['V00MCMTMC'] >= 3.0)),'FullTCL']=1.0

In [14]:
print('Number of subjects WITH Full CTL : {}'.format((moaks_OA_features['FullTCL']==1.0).sum()))


Number of subjects WITH Full CTL : 69


In [15]:
# Then we fill the rest of the values of Full_CTL column with 0.0 values instead of NaN
moaks_OA_features.loc[((common_fnih_kxr_00['V00MCMFLA'] < 3.0) &  (common_fnih_kxr_00['V00MCMFLA'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFLP'] < 3.0) & (common_fnih_kxr_00['V00MCMFLP'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFLC'] < 3.0) & (common_fnih_kxr_00['V00MCMFLC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTLA'] < 3.0) & (common_fnih_kxr_00['V00MCMTLA'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMTLP'] < 3.0) & (common_fnih_kxr_00['V00MCMTLP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTLC'] < 3.0) & (common_fnih_kxr_00['V00MCMTLC'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFMA'] < 3.0) & (common_fnih_kxr_00['V00MCMFMA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMFMP'] < 3.0) & (common_fnih_kxr_00['V00MCMFMP'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFMC'] < 3.0) & (common_fnih_kxr_00['V00MCMFMC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTMA'] < 3.0) & (common_fnih_kxr_00['V00MCMTMA'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMTMP'] < 3.0) & (common_fnih_kxr_00['V00MCMTMP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTMC'] < 3.0) & (common_fnih_kxr_00['V00MCMTMC'] >= 0.0)),'FullTCL']=0.0
print(moaks_OA_features['FullTCL'].value_counts())


0.0    531
1.0     69
Name: FullTCL, dtype: int64


In [16]:
# now the column AnyTCL is filled based on the initial Thickness Cartilage Loss variables
# if at least one variable value is larger than 1.0 then it has either Full or Partial Thickness Cartilage Loss
# and the AnyTCL cell value is 1.0

moaks_OA_features.loc[((common_fnih_kxr_00['V00MCMFLA'] >= 1.0) | (common_fnih_kxr_00['V00MCMFLP'] >= 1.0) |
                (common_fnih_kxr_00['V00MCMFLC'] >= 1.0) | (common_fnih_kxr_00['V00MCMTLA'] >= 1.0) |
                (common_fnih_kxr_00['V00MCMTLP'] >= 1.0) | (common_fnih_kxr_00['V00MCMTLC'] >= 1.0) |
                (common_fnih_kxr_00['V00MCMFMA'] >= 1.0) | (common_fnih_kxr_00['V00MCMFMP'] >= 1.0) |
                (common_fnih_kxr_00['V00MCMFMC'] >= 1.0) | (common_fnih_kxr_00['V00MCMTMA'] >= 1.0) |
                (common_fnih_kxr_00['V00MCMTMP'] >= 1.0) | (common_fnih_kxr_00['V00MCMTMC'] >= 1.0)),'AnyTCL']=1.0



In [17]:
# if all Thickness Cartilage Loss variables are less than 1.0 then the subject has no Thickness Cartilage Loss
# and the AnyTCL cell value is 0.0
moaks_OA_features.loc[((common_fnih_kxr_00['V00MCMFLA'] < 1.0) & (common_fnih_kxr_00['V00MCMFLA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMFLP'] < 1.0) & (common_fnih_kxr_00['V00MCMFLP'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFLC'] < 1.0) & (common_fnih_kxr_00['V00MCMFLC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTLA'] < 1.0) & (common_fnih_kxr_00['V00MCMTLA'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMTLP'] < 1.0) & (common_fnih_kxr_00['V00MCMTLP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTLC'] < 1.0) & (common_fnih_kxr_00['V00MCMTLC'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFMA'] < 1.0) & (common_fnih_kxr_00['V00MCMFMA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMFMP'] < 1.0) & (common_fnih_kxr_00['V00MCMFMP'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMFMC'] < 1.0) & (common_fnih_kxr_00['V00MCMFMC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTMA'] < 1.0) & (common_fnih_kxr_00['V00MCMTMA'] >= 0.0) &
                (common_fnih_kxr_00['V00MCMTMP'] < 1.0) & (common_fnih_kxr_00['V00MCMTMP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MCMTMC'] < 1.0) & (common_fnih_kxr_00['V00MCMTMC'] >= 0.0)),'AnyTCL']=0.0
print('Number of Subjects WITHOUT Any Cartilage Thickness Loss: {}'.
        format((moaks_OA_features['AnyTCL']==0.0).sum()))


Number of Subjects WITHOUT Any Cartilage Thickness Loss: 31


In [18]:
# print(moaks_OA_features[moaks_OA_features['AnyTCL']==0].count())
print(moaks_OA_features.AnyTCL.value_counts())

1.0    569
0.0     31
Name: AnyTCL, dtype: int64


In [19]:
# Now we fill the PartialTCL column initially with the values of the AnyTCL column
# Where the FullTCL column is 1.0 then the PartialTCL is 0.0
# So if the PartialTCL is 1.0 then the Thickness Cartilage Loss variables are between 1.0 and 3.0

moaks_OA_features['PartialTCL'] = moaks_OA_features['AnyTCL']
moaks_OA_features.loc[(moaks_OA_features['FullTCL']==1),'PartialTCL']=0.0
print('Number of Subjects with Partial Thickness Cartilage Loss: {}'.
        format((moaks_OA_features['PartialTCL']==1.0).sum()))

Number of Subjects with Partial Thickness Cartilage Loss: 500


In [20]:
# # NOW WE FILL THE DEFINITE OSTEOPHYTES COLUMN

# moaks_OA_features.loc[((moaks_grades['V00MOSFLA'] >= 2.0) | (moaks_grades['V00MOSFLP'] >= 2.0) |
#                 (moaks_grades['V00MOSFLC'] >= 2.0) | (moaks_grades['V00MOSTL'] >= 2.0) |
#                 (moaks_grades['V00MOSFMA'] >= 2.0) | (moaks_grades['V00MOSFMP'] >= 2.0) |
#                 (moaks_grades['V00MOSFMC'] >= 2.0) | (moaks_grades['V00MOSTM'] >= 2.0)),'Definite_Osteophytes'] = 1.0

# moaks_OA_features.loc[((moaks_grades['V00MOSFLA'] < 2.0) & (moaks_grades['V00MOSFLA'] >= 0.0) & 
#                 (moaks_grades['V00MOSFLP'] < 2.0) & (moaks_grades['V00MOSFLP'] >= 0.0) &
#                 (moaks_grades['V00MOSFLC'] < 2.0) & (moaks_grades['V00MOSFLC'] >= 0.0) & 
#                 (moaks_grades['V00MOSTL'] < 2.0) & (moaks_grades['V00MOSTL'] >= 0.0) &
#                 (moaks_grades['V00MOSFMA'] < 2.0) & (moaks_grades['V00MOSFMA'] >= 0.0) & 
#                 (moaks_grades['V00MOSFMP'] < 2.0) & (moaks_grades['V00MOSFMP'] >= 0.0) &
#                 (moaks_grades['V00MOSFMC'] < 2.0) & (moaks_grades['V00MOSFMC'] >= 0.0) & 
#                 (moaks_grades['V00MOSTM'] < 2.0) & (moaks_grades['V00MOSTM'] >= 0.0)),'Definite_Osteophytes'] = 0.0

# print('With Definite Osteophytes->1.0, Without->0.0 \n{}'.format(moaks_OA_features.Definite_Osteophytes.value_counts(dropna=False)))


# # print('Number of Subjects WITH Definite Osteophytes: {}'.
# #         format((moaks_OA_features['Definite_Osteophytes']==1.0).sum()))

# # print('Number of Subjects WITHOUT Definite Osteophytes: {}'.
# #         format((moaks_OA_features['Definite_Osteophytes']==0.0).sum()))


In [21]:
## NOW WE FILL THE XRAY OSTEOPHYTES COLUMN
moaks_OA_features.loc[((common_fnih_kxr_00['V00XROSFL'] >= 1.0) | (common_fnih_kxr_00['V00XROSFM'] >= 1.0) | 
                (common_fnih_kxr_00['V00XROSTL'] >= 1.0) | (common_fnih_kxr_00['V00XROSTM'] >= 1.0)),'XR_Osteophytes'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_00['V00XROSFL'] < 1.0) & (common_fnih_kxr_00['V00XROSFL'] >= 0.0) &
                (common_fnih_kxr_00['V00XROSFM'] < 1.0) & (common_fnih_kxr_00['V00XROSFM'] >= 0.0) & 
                (common_fnih_kxr_00['V00XROSTL'] < 1.0) & (common_fnih_kxr_00['V00XROSTL'] >= 0.0) &
                (common_fnih_kxr_00['V00XROSTM'] < 1.0) & (common_fnih_kxr_00['V00XROSTM'] >= 0.0)),'XR_Osteophytes'] = 0.0

print('With Xray Osteophytes->1.0, Without->0.0 \n{}'.format(moaks_OA_features.XR_Osteophytes.value_counts(dropna=False)))

With Xray Osteophytes->1.0, Without->0.0 
1.0    546
0.0     54
Name: XR_Osteophytes, dtype: int64


In [22]:
# print(moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes.isna()].index)
# print(common_fnih_kxr_00.iloc[moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes.isna()].index,-5:])

In [23]:
# NOW WE FILL THE BML COLUMN

moaks_OA_features.loc[((common_fnih_kxr_00['V00MBMSFLC'] >= 1.0) | (common_fnih_kxr_00['V00MBMSFLP'] >= 1.0) |
                (common_fnih_kxr_00['V00MBMSTLA'] >= 1.0) | (common_fnih_kxr_00['V00MBMSTLC'] >= 1.0) |
                (common_fnih_kxr_00['V00MBMSTLP'] >= 1.0) | (common_fnih_kxr_00['V00MBMSFMC'] >= 1.0) | 
                (common_fnih_kxr_00['V00MBMSFMP'] >= 1.0) | (common_fnih_kxr_00['V00MBMSTMA'] >= 1.0) | 
                (common_fnih_kxr_00['V00MBMSTMC'] >= 1.0) | (common_fnih_kxr_00['V00MBMSTMP'] >= 1.0)),'BML'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_00['V00MBMSFLC'] < 1.0) & (common_fnih_kxr_00['V00MBMSFLC'] >= 0.0) &
                (common_fnih_kxr_00['V00MBMSFLP'] < 1.0) & (common_fnih_kxr_00['V00MBMSFLP'] >= 0.0) &
                (common_fnih_kxr_00['V00MBMSTLA'] < 1.0) & (common_fnih_kxr_00['V00MBMSTLA'] >= 0.0) &
                (common_fnih_kxr_00['V00MBMSTLC'] < 1.0) & (common_fnih_kxr_00['V00MBMSTLC'] >= 0.0) &
                (common_fnih_kxr_00['V00MBMSTLP'] < 1.0) & (common_fnih_kxr_00['V00MBMSTLP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MBMSFMC'] < 1.0) & (common_fnih_kxr_00['V00MBMSFMC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MBMSFMP'] < 1.0) & (common_fnih_kxr_00['V00MBMSFMP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MBMSTMA'] < 1.0) & (common_fnih_kxr_00['V00MBMSTMA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MBMSTMC'] < 1.0) & (common_fnih_kxr_00['V00MBMSTMC'] >= 0.0) & 
                (common_fnih_kxr_00['V00MBMSTMP'] < 1.0) & (common_fnih_kxr_00['V00MBMSTMP'] >= 0.0)),'BML'] = 0.0

print('With BML->1.0, Without->0.0 \n{}'.format(moaks_OA_features.BML.value_counts(dropna=False)))


# print('Number of Subjects WITH Bone Marrow Lesions: {}'.
#         format((moaks_OA_features['BML']==1.0).sum()))

# print('Number of Subjects WITHOUT Bone Marrow Lesions: {}'.
#         format((moaks_OA_features['BML']==0.0).sum()))

With BML->1.0, Without->0.0 
1.0    346
0.0    253
NaN      1
Name: BML, dtype: int64


In [24]:
# AND NOW WE FILL THE MENISCUS DEGRADATION COLUMN

moaks_OA_features.loc[((common_fnih_kxr_00['V00MMTLA'] >= 2.0) | (common_fnih_kxr_00['V00MMTLB'] >= 2.0) |
                (common_fnih_kxr_00['V00MMTLP'] >= 2.0) | (common_fnih_kxr_00['V00MMRTL'] >= 2.0) |
                (common_fnih_kxr_00['V00MMTMA'] >= 2.0) | (common_fnih_kxr_00['V00MMTMB'] >= 2.0) |
                (common_fnih_kxr_00['V00MMTMP'] >= 2.0) | (common_fnih_kxr_00['V00MMRTM'] >= 2.0) |
                (common_fnih_kxr_00['V00MMXLL'] >= 1.0) | (common_fnih_kxr_00['V00MMXLA'] >= 1.0) |
                (common_fnih_kxr_00['V00MMXMM'] >= 1.0) | (common_fnih_kxr_00['V00MMXMA'] >= 1.0)),'Meniscus_Degradation'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_00['V00MMTLA'] < 2.0) & (common_fnih_kxr_00['V00MMTLA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMTLB'] < 2.0) & (common_fnih_kxr_00['V00MMTLB'] >= 0.0) &
                (common_fnih_kxr_00['V00MMTLP'] < 2.0) & (common_fnih_kxr_00['V00MMTLP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMRTL'] < 2.0) & (common_fnih_kxr_00['V00MMRTL'] >= 0.0) &
                (common_fnih_kxr_00['V00MMTMA'] < 2.0) & (common_fnih_kxr_00['V00MMTMA'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMTMB'] < 2.0) & (common_fnih_kxr_00['V00MMTMB'] >= 0.0) &
                (common_fnih_kxr_00['V00MMTMP'] < 2.0) & (common_fnih_kxr_00['V00MMTMP'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMRTM'] < 2.0) & (common_fnih_kxr_00['V00MMRTM'] >= 0.0) &
                (common_fnih_kxr_00['V00MMXLL'] < 1.0) & (common_fnih_kxr_00['V00MMXLL'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMXLA'] < 1.0) & (common_fnih_kxr_00['V00MMXLA'] >= 0.0) &
                (common_fnih_kxr_00['V00MMXMM'] < 1.0) & (common_fnih_kxr_00['V00MMXMM'] >= 0.0) & 
                (common_fnih_kxr_00['V00MMXMA'] < 1.0) & (common_fnih_kxr_00['V00MMXMA'] >= 0.0)),'Meniscus_Degradation'] = 0.0

print('With Meniscal Degradation->1.0, Without->0.0 \n{}'.format(moaks_OA_features.Meniscus_Degradation.value_counts(dropna=False)))


# print('Number of Subjects WITH Meniscus Degradation: {}'.
#         format((moaks_OA_features['Meniscus_Degradation']==1.0).sum()))

# print('Number of Subjects WITHOUT Meniscus Degradation: {}'.
#         format((moaks_OA_features['Meniscus_Degradation']==0.0).sum()))

With Meniscal Degradation->1.0, Without->0.0 
1.0    494
0.0    106
Name: Meniscus_Degradation, dtype: int64


In [25]:
print('Number of NaN values per column: {}'.format(moaks_OA_features.isna().sum().to_list()))

Number of NaN values per column: [0, 0, 0, 0, 0, 0, 1, 0]


In [26]:
moaks_OA_features

Unnamed: 0,ID,SIDE,XR_Osteophytes,FullTCL,AnyTCL,PartialTCL,BML,Meniscus_Degradation
0,9001695,1,1.0,0.0,1.0,1.0,1.0,1.0
1,9002116,2,1.0,0.0,1.0,1.0,1.0,1.0
2,9002430,1,1.0,0.0,1.0,1.0,1.0,1.0
3,9002817,1,1.0,0.0,1.0,1.0,1.0,1.0
4,9003316,1,1.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...
595,9993833,2,1.0,0.0,1.0,1.0,1.0,1.0
596,9994408,1,1.0,0.0,1.0,1.0,1.0,1.0
597,9995338,2,1.0,0.0,1.0,1.0,1.0,0.0
598,9996098,1,1.0,0.0,1.0,1.0,1.0,1.0


In [27]:
moaks_OA_features.to_csv('moaks_fnih_sq00_OA_features.csv')

# NOW WE CONSTRUCT THE DF THAT CONTAINS THE OA CRITERIA AS COLUMNS

In [28]:
moaks_OA_criteria = pd.DataFrame(columns=['ID','SIDE',
             'Osteo_AND_FullTCL',
             'Osteo_AND_PartialTCL_AND_BML',
             'Osteo_AND_PartialTCL_AND_Meniscus_Degradation',
             'Osteo_AND_BML_AND_Meniscus_Degradation',
             'FullTCL_AND_BML_AND_Meniscus_Degradation'])
# the ID and SIDE columns are integers
moaks_OA_criteria[['ID','SIDE']] = common_fnih_kxr_00[['ID','SIDE']]
# and we fill the rest of the df with np.nan values
moaks_OA_criteria.iloc[:,2:] = np.nan
moaks_OA_criteria

Unnamed: 0,ID,SIDE,Osteo_AND_FullTCL,Osteo_AND_PartialTCL_AND_BML,Osteo_AND_PartialTCL_AND_Meniscus_Degradation,Osteo_AND_BML_AND_Meniscus_Degradation,FullTCL_AND_BML_AND_Meniscus_Degradation
0,9001695,1,,,,,
1,9002116,2,,,,,
2,9002430,1,,,,,
3,9002817,1,,,,,
4,9003316,1,,,,,
...,...,...,...,...,...,...,...
595,9993833,2,,,,,
596,9994408,1,,,,,
597,9995338,2,,,,,
598,9996098,1,,,,,


In [29]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['FullTCL'] == 1.0)),'Osteo_AND_FullTCL'] = 1.0
         
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['FullTCL'] != 1.0)),'Osteo_AND_FullTCL'] = 0.0

print('With XR Osteophytes AND FullTCL->1.0, Without->0.0: \n{}'.
        format(moaks_OA_criteria['Osteo_AND_FullTCL'].value_counts(dropna=False)))        

With XR Osteophytes AND FullTCL->1.0, Without->0.0: 
0.0    532
1.0     68
Name: Osteo_AND_FullTCL, dtype: int64


In [30]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['PartialTCL'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0)),'Osteo_AND_PartialTCL_AND_BML'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['PartialTCL'] != 1.0) |
            (moaks_OA_features['BML'] != 1.0)),'Osteo_AND_PartialTCL_AND_BML'] = 0.0
print('With XR Osteophytes AND PartialTCL AND BML->1.0, Without->0.0 \n{}'.
        format(moaks_OA_criteria.Osteo_AND_PartialTCL_AND_BML.value_counts(dropna=False)))

With XR Osteophytes AND PartialTCL AND BML->1.0, Without->0.0 
0.0    333
1.0    267
Name: Osteo_AND_PartialTCL_AND_BML, dtype: int64


In [31]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['PartialTCL'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['PartialTCL'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] = 0.0
print('With Osteophytes AND Partial TCL AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 

With Osteophytes AND Partial TCL AND Meniscus_Degradation->1.0, Without->0.0: 
1.0    381
0.0    219
Name: Osteo_AND_PartialTCL_AND_Meniscus_Degradation, dtype: int64


In [32]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'Osteo_AND_BML_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['BML'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'Osteo_AND_BML_AND_Meniscus_Degradation'] = 0.0            
print('With Osteophytes AND BML AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 

With Osteophytes AND BML AND Meniscus_Degradation->1.0, Without->0.0: 
0.0    306
1.0    294
Name: Osteo_AND_BML_AND_Meniscus_Degradation, dtype: int64


In [33]:
moaks_OA_criteria.loc[((moaks_OA_features['FullTCL'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'FullTCL_AND_BML_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['FullTCL'] != 1.0) | 
            (moaks_OA_features['BML'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'FullTCL_AND_BML_AND_Meniscus_Degradation'] = 0.0
print('With FullTCL AND BML AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 


With FullTCL AND BML AND Meniscus_Degradation->1.0, Without->0.0: 
0.0    548
1.0     52
Name: FullTCL_AND_BML_AND_Meniscus_Degradation, dtype: int64


In [34]:
print('Number of subjects without OA for each criterion is: \n{}'.format(moaks_OA_criteria.isna().sum()))

Number of subjects without OA for each criterion is: 
ID                                               0
SIDE                                             0
Osteo_AND_FullTCL                                0
Osteo_AND_PartialTCL_AND_BML                     0
Osteo_AND_PartialTCL_AND_Meniscus_Degradation    0
Osteo_AND_BML_AND_Meniscus_Degradation           0
FullTCL_AND_BML_AND_Meniscus_Degradation         0
dtype: int64


In [35]:
moaks_OA_criteria.fillna(-1.0,inplace=True)

In [36]:
moaks_OA_criteria.to_csv('moaks_fnih_sq00_OA_criteria.csv')

In [37]:
moaks_osteo_FullTCL = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_FullTCL']==1.0),
                        ['ID','SIDE','Osteo_AND_FullTCL']]

moaks_osteo_PartialTCL_bml = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML']==1.0),
                        ['ID','SIDE','Osteo_AND_PartialTCL_AND_BML']]

moaks_osteo_PartialTCL_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','Osteo_AND_PartialTCL_AND_Meniscus_Degradation']]

moaks_osteo_bml_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','Osteo_AND_BML_AND_Meniscus_Degradation']]


moaks_FullTCL_bml_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','FullTCL_AND_BML_AND_Meniscus_Degradation']]

In [38]:
# NOW WE FIND THOSE WHO SATISFY AT LEAST ONE OF THE OA CRITERIA
moaks_OA_criteria['moaks_OA'] = np.nan
moaks_OA_criteria.loc[((moaks_OA_criteria['Osteo_AND_FullTCL']==1.0) |
                (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML']==1.0) |
                (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation']==1.0) |
                (moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation']==1.0) |
                (moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation']==1.0)),'moaks_OA'] = 1.0
# moaks_OA_criteria.loc[((moaks_OA_criteria['Osteo_AND_FullTCL'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation'] != 0.0) |
#                 (moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation'] != 0.0)),'moaks_OA'] = 0.0
moaks_OA_criteria.fillna(0.0,inplace=True)
print('Number of subjects that satisfy at least one OA criterion is : {}'.format((moaks_OA_criteria['moaks_OA']==1.0).sum()))

Number of subjects that satisfy at least one OA criterion is : 478


In [39]:
moaks_OA_criteria['moaks_OA'].value_counts()

1.0    478
0.0    122
Name: moaks_OA, dtype: int64

# NOW WE CREATE THE DATAFRAME CONTAINING INFORMATION REGARDING 
# 1.MOAKS FEATURES 
# 2.MOAKS CRITERIA
# 3.MOAKS GRADES
# 4.KL GRADES
# 5.MOAKS-KL GRADES CROSSTABLE

In [40]:
# Here we upload the DataFrame with the KL grades
# and we merge the MOAKS and KL dataframes
kl_id_side_grade = pd.read_csv('/home/anastasis/EMC_Thesis/My_codes/KL_readings/KL_SQ00_ID_SIDE_GRADE.csv')
moaks_kl_and_criteria = pd.merge(moaks_OA_criteria,kl_id_side_grade,on=['ID','SIDE'],how='inner')
print(moaks_kl_and_criteria.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 600 entries, 0 to 599
Data columns (total 9 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   ID                                             600 non-null    int64  
 1   SIDE                                           600 non-null    int64  
 2   Osteo_AND_FullTCL                              600 non-null    float64
 3   Osteo_AND_PartialTCL_AND_BML                   600 non-null    float64
 4   Osteo_AND_PartialTCL_AND_Meniscus_Degradation  600 non-null    float64
 5   Osteo_AND_BML_AND_Meniscus_Degradation         600 non-null    float64
 6   FullTCL_AND_BML_AND_Meniscus_Degradation       600 non-null    float64
 7   moaks_OA                                       600 non-null    float64
 8   V00XRKL                                        600 non-null    float64
dtypes: float64(7), int64(2)
memory usage: 46.9 KB
None


In [41]:
print('Number of different KL grades in the moaks_kl_and_criteria dataframe:\n{}'
        .format(moaks_kl_and_criteria['V00XRKL'].value_counts()))


Number of different KL grades in the moaks_kl_and_criteria dataframe:
2.0    306
3.0    219
1.0     75
Name: V00XRKL, dtype: int64


In [42]:
moaks_kl_and_criteria.to_csv('moaks_fnih_sq00_kl_grades_and_features.csv')


In [43]:
moaks_KL_OA_all_info = pd.merge(moaks_OA_features,moaks_kl_and_criteria,on=['ID','SIDE'],how='outer')

In [44]:
moaks_KL_OA_all_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 600 entries, 0 to 599
Data columns (total 15 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   ID                                             600 non-null    int64  
 1   SIDE                                           600 non-null    int64  
 2   XR_Osteophytes                                 600 non-null    float64
 3   FullTCL                                        600 non-null    float64
 4   AnyTCL                                         600 non-null    float64
 5   PartialTCL                                     600 non-null    float64
 6   BML                                            599 non-null    float64
 7   Meniscus_Degradation                           600 non-null    float64
 8   Osteo_AND_FullTCL                              600 non-null    float64
 9   Osteo_AND_PartialTCL_AND_BML                   600 non

In [45]:
moaks_KL_OA_all_info.to_csv('moaks_fnih_sq00_all_info.csv')

In [46]:
all_moaks_grades_sq00 = moaks_KL_OA_all_info.loc[:,['ID','SIDE','moaks_OA']]
all_moaks_vs_kl_grades_sq00 = moaks_KL_OA_all_info.loc[:,['ID','SIDE','moaks_OA','V00XRKL']]

In [47]:
print('Sum of FNIH_SQ00 subjects that have MOAKS = 0.0: {}'.format((all_moaks_grades_sq00['moaks_OA']==0.0).sum()))
print('Sum of FNIH_SQ00 subjects that have MOAKS = 1.0: {}'.format((all_moaks_grades_sq00['moaks_OA']==1.0).sum()))

Sum of FNIH_SQ00 subjects that have MOAKS = 0.0: 122
Sum of FNIH_SQ00 subjects that have MOAKS = 1.0: 478


In [48]:
moaks_fnih_sq00_only_moaks_one = all_moaks_vs_kl_grades_sq00.loc[(all_moaks_vs_kl_grades_sq00['moaks_OA']==1.0)]
print(moaks_fnih_sq00_only_moaks_one,'\n','number of subjects with moaks grade = 1.0: ',moaks_fnih_sq00_only_moaks_one.moaks_OA.count())

          ID  SIDE  moaks_OA  V00XRKL
0    9001695     1       1.0      2.0
1    9002116     2       1.0      3.0
2    9002430     1       1.0      2.0
3    9002817     1       1.0      3.0
4    9003316     1       1.0      2.0
..       ...   ...       ...      ...
595  9993833     2       1.0      3.0
596  9994408     1       1.0      3.0
597  9995338     2       1.0      2.0
598  9996098     1       1.0      3.0
599  9997381     1       1.0      2.0

[478 rows x 4 columns] 
 number of subjects with moaks grade = 1.0:  478


In [49]:
moaks_fnih_sq00_only_moaks_zero = all_moaks_vs_kl_grades_sq00.loc[(all_moaks_vs_kl_grades_sq00['moaks_OA']==0.0)]
print(moaks_fnih_sq00_only_moaks_zero,'\n','number of subjects with moaks grade = 0.0: ',moaks_fnih_sq00_only_moaks_zero.moaks_OA.count())

          ID  SIDE  moaks_OA  V00XRKL
5    9003380     1       0.0      1.0
7    9004175     1       0.0      2.0
16   9015798     1       0.0      2.0
20   9021102     2       0.0      2.0
21   9022789     1       0.0      2.0
..       ...   ...       ...      ...
561  9925594     1       0.0      1.0
566  9936451     1       0.0      2.0
583  9969009     2       0.0      2.0
584  9976207     2       0.0      1.0
586  9981798     1       0.0      1.0

[122 rows x 4 columns] 
 number of subjects with moaks grade = 0.0:  122


In [50]:
all_moaks_vs_kl_grades_sq00

Unnamed: 0,ID,SIDE,moaks_OA,V00XRKL
0,9001695,1,1.0,2.0
1,9002116,2,1.0,3.0
2,9002430,1,1.0,2.0
3,9002817,1,1.0,3.0
4,9003316,1,1.0,2.0
...,...,...,...,...
595,9993833,2,1.0,3.0
596,9994408,1,1.0,3.0
597,9995338,2,1.0,2.0
598,9996098,1,1.0,3.0


In [51]:
# Here we save the dataframes containing the information we want
all_moaks_vs_kl_grades_sq00.to_csv('moaks_fnih_sq00_moaks_vs_kl.csv')
moaks_fnih_sq00_only_moaks_one.to_csv('moaks_fnih_sq00_only_moaks_one.csv')
moaks_fnih_sq00_only_moaks_zero.to_csv('moaks_fnih_sq00_only_moaks_zero.csv')
all_moaks_grades_sq00.to_csv('moaks_fnih_sq00_moaks_grades.csv')

In [52]:
moaks_OA_KL_crosstab = pd.crosstab(all_moaks_vs_kl_grades_sq00.moaks_OA,all_moaks_vs_kl_grades_sq00.V00XRKL,margins=True,margins_name='Total')
moaks_OA_KL_crosstab.to_csv('moaks_fnih_sq00_OA_KL_crosstab.csv')
moaks_OA_KL_crosstab

V00XRKL,1.0,2.0,3.0,Total
moaks_OA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,56,65,1,122
1.0,19,241,218,478
Total,75,306,219,600


In [53]:
# NOW WE SAVE THE SUBJECTS WITH ZERO FEATURES IN ORDER TO INVESTIGATE THE INCIDENCE OA
moaks_OA_osteo_zero = moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes == 0.0]
moaks_OA_osteo_zero.to_csv('moaks_fnih_sq00_Zero_Osteo.csv')
print('Number of subjects without definite osteophytes in FNIH_SQ00 is : {}'.format(moaks_OA_osteo_zero.shape[0]))

moaks_OA_TCL_zero = moaks_OA_features.loc[(moaks_OA_features.FullTCL == 0.0) & (moaks_OA_features.PartialTCL == 0.0) & (moaks_OA_features.AnyTCL == 0.0)]
moaks_OA_TCL_zero.to_csv('moaks_fnih_sq00_Zero_TCL.csv')
print('Number of subjects without TCL in FNIH_SQ00 is : {}'.format(moaks_OA_TCL_zero.shape[0]))

moaks_OA_bml_zero = moaks_OA_features.loc[moaks_OA_features.BML == 0.0]
moaks_OA_bml_zero.to_csv('moaks_fnih_sq00_Zero_BML.csv')
print('Number of subjects without BML in FNIH_SQ00 is : {}'.format(moaks_OA_bml_zero.shape[0]))

moaks_OA_meniscus_zero = moaks_OA_features.loc[moaks_OA_features.Meniscus_Degradation == 0.0]
moaks_OA_meniscus_zero.to_csv('moaks_fnih_sq00_Zero_Meniscus.csv')
print('Number of subjects without Meniscus Degradation in FNIH_SQ00 is : {}'.format(moaks_OA_meniscus_zero.shape[0]))


Number of subjects without definite osteophytes in FNIH_SQ00 is : 54
Number of subjects without TCL in FNIH_SQ00 is : 31
Number of subjects without BML in FNIH_SQ00 is : 253
Number of subjects without Meniscus Degradation in FNIH_SQ00 is : 106
