In [1]:
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
moaks_fnih_sq01 = pd.read_csv('/home/anastasis/EMC_Thesis/OAI Data/OAICompleteData_ASCII/kMRI_FNIH_SQ_MOAKS_BICL01.txt', sep="|")

In [3]:
print(moaks_fnih_sq01.info(),'\n',moaks_fnih_sq01.iloc[:,-3])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 122 entries, ID to V01MTCMNTS
dtypes: float64(16), int64(2), object(104)
memory usage: 572.0+ KB
None 
 0      1: Yes
1      1: Yes
2      1: Yes
3       0: No
4       0: No
        ...  
595    1: Yes
596     0: No
597    1: Yes
598     0: No
599     0: No
Name: V01MPOPCYS, Length: 600, dtype: object


# NOW WE DELETE ALL THE COMMENTS OF THE INITIAL DATAFRAME AND CONVERT THE VALUES TO NUMBERS

In [4]:
# Firstly we replace all the comments
moaks_without_commments = moaks_fnih_sq01.replace(r'(\:).*$',r'\1',regex=True)
# Secondly we replace all the '.:' and the ':' bullets with space and remove 'R' from Reader Column
moaks_without_commments_bullets = moaks_without_commments.replace({'\.\:':'',':':'','R':''},regex=True)
# Thirdly we remove the Comments and Technical Considerations Columns from the whole Dataset
moaks_without_commments_bullets = moaks_without_commments_bullets.drop(columns=['V01MCMNTS','V01MTCMNTS'])
print(moaks_without_commments_bullets.READPRJ.value_counts())

22    600
Name: READPRJ, dtype: int64


In [5]:
# THEN WE CONVERT ALL THE DF VALUES TO FLOAT AND THE ID AND SIDE TO INT
moaks_without_c_b_numeric =  moaks_without_commments_bullets.apply(pd.to_numeric,downcast='float')
moaks_without_c_b_numeric[['ID','SIDE']] = moaks_without_c_b_numeric[['ID','SIDE']].astype('int')
print(moaks_without_c_b_numeric.info())
print('Number of NaN values are : ',moaks_without_c_b_numeric.isna().sum().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V01MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
None
Number of NaN values are :  2994


In [6]:
print('Number of NaN values in the initial dataframe: {}'.format(moaks_without_c_b_numeric.loc[:,:].isna().sum().to_list()))

Number of NaN values in the initial dataframe: [0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 20, 18, 18, 18, 18, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 130, 130, 130, 20, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 600, 18]


In [7]:
moaks_without_c_b_nan_numeric = moaks_without_c_b_numeric.fillna(-1.0)
moaks_without_c_b_nan_numeric.info()
print('Number of NaN values in the Dataframe: {}'.format(moaks_without_c_b_nan_numeric.isna().sum().sum()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V01MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
Number of NaN values in the Dataframe: 0


In [8]:
# So the clean version of the moaks_fnih_sq01 is the below: 
moaks_grades = moaks_without_c_b_nan_numeric
print(moaks_grades.info())
moaks_grades.to_csv('moaks_fnih_sq01_initial_numeric_df.csv')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Columns: 120 entries, ID to V01MPOPCYS
dtypes: float32(118), int64(2)
memory usage: 286.1 KB
None


# NOW WE LOAD THE RADIOGRAPH INFORMATION DATABASE IN ORDER TO ACQUIRE THE XR OSTEOPHYTES

In [9]:
kxr_sq_bu01= pd.read_csv('/home/anastasis/EMC_Thesis/OAI Data/OAICompleteData_ASCII/kxr_sq_bu01.txt', sep="|")
KL_prj15 = kxr_sq_bu01[kxr_sq_bu01['readprj'] == 15].copy()
KL_prj15_osteophytes = KL_prj15.loc[:,['ID','SIDE','V01XROSFL','V01XROSFM','V01XROSTL','V01XROSTM','V01XRKL']]


In [10]:
#now we create the df that contains both MOAKS grades and XR Osteophytes
common_fnih_kxr_01 = pd.merge(moaks_grades,KL_prj15_osteophytes,on=['ID','SIDE'],how='inner')
common_fnih_kxr_01.fillna(0.0,inplace=True)
common_fnih_kxr_01.isna().sum()

ID           0
SIDE         0
VERSION      0
READPRJ      0
V01READER    0
            ..
V01XROSFL    0
V01XROSFM    0
V01XROSTL    0
V01XROSTM    0
V01XRKL      0
Length: 125, dtype: int64

In [55]:
common_fnih_kxr_01[common_fnih_kxr_01.V01XRKL==0]
common_fnih_kxr_01.V01XRKL.value_counts()

2.0    296
3.0    224
1.0     59
4.0      5
Name: V01XRKL, dtype: int64

# NOW WE FIND THE SUBJECTS THAT FULFILL THE OA FEATURES FOR THE WHOLE KNEE JOINT

In [11]:
# FRIST WE CONSTRUCT A NEW DATAFRAME WITH ALL THE OA FEATURES AS COLUMNS
# The 'AnyTCL' column is an assistive variable that indicates as 1.0
# the subjects with at least on variable of thickness cartilage loss >=1.0
# and as 0.0 the subjects that DO NOT have any thickness cartilage loss

moaks_OA_features = pd.DataFrame(
    columns=['ID','SIDE','XR_Osteophytes','FullTCL','AnyTCL',
             'PartialTCL','BML', 'Meniscus_Degradation'])
# the ID and SIDE columns are integers
moaks_OA_features[['ID','SIDE']] = common_fnih_kxr_01[['ID','SIDE']]
# and we fill the rest of the df with np.nan values
moaks_OA_features.iloc[:,2:] = np.nan

print('Type of ID and SIDE columns {} and type of the rest of the columns {}'
    .format(type(moaks_OA_features.iloc[0,0]),type(moaks_OA_features.iloc[0,2])))


Type of ID and SIDE columns <class 'numpy.int64'> and type of the rest of the columns <class 'numpy.float64'>


In [12]:
moaks_OA_features

Unnamed: 0,ID,SIDE,XR_Osteophytes,FullTCL,AnyTCL,PartialTCL,BML,Meniscus_Degradation
0,9001695,1,,,,,,
1,9002116,2,,,,,,
2,9002430,1,,,,,,
3,9002817,1,,,,,,
4,9003316,1,,,,,,
...,...,...,...,...,...,...,...,...
579,9993833,2,,,,,,
580,9994408,1,,,,,,
581,9995338,2,,,,,,
582,9996098,1,,,,,,


In [13]:
# Then we fill the FullTCL column with all the subjects with 
# at least one cartilage thickness loss variable larger than 3.0
# and the FullTCL cell value is 1.0
moaks_OA_features.loc[((common_fnih_kxr_01['V01MCMFLA'] >= 3.0) | (common_fnih_kxr_01['V01MCMFLP'] >= 3.0) |
                (common_fnih_kxr_01['V01MCMFLC'] >= 3.0) | (common_fnih_kxr_01['V01MCMTLA'] >= 3.0) |
                (common_fnih_kxr_01['V01MCMTLP'] >= 3.0) | (common_fnih_kxr_01['V01MCMTLC'] >= 3.0) |
                (common_fnih_kxr_01['V01MCMFMA'] >= 3.0) | (common_fnih_kxr_01['V01MCMFMP'] >= 3.0) |
                (common_fnih_kxr_01['V01MCMFMC'] >= 3.0) | (common_fnih_kxr_01['V01MCMTMA'] >= 3.0) |
                (common_fnih_kxr_01['V01MCMTMP'] >= 3.0) | (common_fnih_kxr_01['V01MCMTMC'] >= 3.0)),'FullTCL']=1.0

In [14]:
print('Number of subjects WITH Full CTL : {}'.format((moaks_OA_features['FullTCL']==1.0).sum()))


Number of subjects WITH Full CTL : 66


In [15]:
# Then we fill the rest of the values of Full_CTL column with 0.0 values instead of NaN
moaks_OA_features.loc[((common_fnih_kxr_01['V01MCMFLA'] < 3.0) &  (common_fnih_kxr_01['V01MCMFLA'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFLP'] < 3.0) & (common_fnih_kxr_01['V01MCMFLP'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFLC'] < 3.0) & (common_fnih_kxr_01['V01MCMFLC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTLA'] < 3.0) & (common_fnih_kxr_01['V01MCMTLA'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMTLP'] < 3.0) & (common_fnih_kxr_01['V01MCMTLP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTLC'] < 3.0) & (common_fnih_kxr_01['V01MCMTLC'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFMA'] < 3.0) & (common_fnih_kxr_01['V01MCMFMA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMFMP'] < 3.0) & (common_fnih_kxr_01['V01MCMFMP'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFMC'] < 3.0) & (common_fnih_kxr_01['V01MCMFMC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTMA'] < 3.0) & (common_fnih_kxr_01['V01MCMTMA'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMTMP'] < 3.0) & (common_fnih_kxr_01['V01MCMTMP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTMC'] < 3.0) & (common_fnih_kxr_01['V01MCMTMC'] >= 0.0)),'FullTCL']=0.0
print(moaks_OA_features['FullTCL'].value_counts())


0.0    508
1.0     66
Name: FullTCL, dtype: int64


In [16]:
# now the column AnyTCL is filled based on the initial Thickness Cartilage Loss variables
# if at least one variable value is larger than 1.0 then it has either Full or Partial Thickness Cartilage Loss
# and the AnyTCL cell value is 1.0

moaks_OA_features.loc[((common_fnih_kxr_01['V01MCMFLA'] >= 1.0) | (common_fnih_kxr_01['V01MCMFLP'] >= 1.0) |
                (common_fnih_kxr_01['V01MCMFLC'] >= 1.0) | (common_fnih_kxr_01['V01MCMTLA'] >= 1.0) |
                (common_fnih_kxr_01['V01MCMTLP'] >= 1.0) | (common_fnih_kxr_01['V01MCMTLC'] >= 1.0) |
                (common_fnih_kxr_01['V01MCMFMA'] >= 1.0) | (common_fnih_kxr_01['V01MCMFMP'] >= 1.0) |
                (common_fnih_kxr_01['V01MCMFMC'] >= 1.0) | (common_fnih_kxr_01['V01MCMTMA'] >= 1.0) |
                (common_fnih_kxr_01['V01MCMTMP'] >= 1.0) | (common_fnih_kxr_01['V01MCMTMC'] >= 1.0)),'AnyTCL']=1.0



In [17]:
# if all Thickness Cartilage Loss variables are less than 1.0 then the subject has no Thickness Cartilage Loss
# and the AnyTCL cell value is 0.0
moaks_OA_features.loc[((common_fnih_kxr_01['V01MCMFLA'] < 1.0) & (common_fnih_kxr_01['V01MCMFLA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMFLP'] < 1.0) & (common_fnih_kxr_01['V01MCMFLP'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFLC'] < 1.0) & (common_fnih_kxr_01['V01MCMFLC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTLA'] < 1.0) & (common_fnih_kxr_01['V01MCMTLA'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMTLP'] < 1.0) & (common_fnih_kxr_01['V01MCMTLP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTLC'] < 1.0) & (common_fnih_kxr_01['V01MCMTLC'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFMA'] < 1.0) & (common_fnih_kxr_01['V01MCMFMA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMFMP'] < 1.0) & (common_fnih_kxr_01['V01MCMFMP'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMFMC'] < 1.0) & (common_fnih_kxr_01['V01MCMFMC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTMA'] < 1.0) & (common_fnih_kxr_01['V01MCMTMA'] >= 0.0) &
                (common_fnih_kxr_01['V01MCMTMP'] < 1.0) & (common_fnih_kxr_01['V01MCMTMP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MCMTMC'] < 1.0) & (common_fnih_kxr_01['V01MCMTMC'] >= 0.0)),'AnyTCL']=0.0
print('Number of Subjects WITHOUT Any Cartilage Thickness Loss: {}'.
        format((moaks_OA_features['AnyTCL']==0.0).sum()))


Number of Subjects WITHOUT Any Cartilage Thickness Loss: 32


In [18]:
# print(moaks_OA_features[moaks_OA_features['AnyTCL']==0].count())
print(moaks_OA_features.AnyTCL.value_counts())

1.0    547
0.0     32
Name: AnyTCL, dtype: int64


In [19]:
# Now we fill the PartialTCL column initially with the values of the AnyTCL column
# Where the FullTCL column is 1.0 then the PartialTCL is 0.0
# So if the PartialTCL is 1.0 then the Thickness Cartilage Loss variables are between 1.0 and 3.0

moaks_OA_features['PartialTCL'] = moaks_OA_features['AnyTCL']
moaks_OA_features.loc[(moaks_OA_features['FullTCL']==1),'PartialTCL']=0.0
print('Number of Subjects with Partial Thickness Cartilage Loss: {}'.
        format((moaks_OA_features['PartialTCL']==1.0).sum()))

Number of Subjects with Partial Thickness Cartilage Loss: 481


In [20]:
# # NOW WE FILL THE DEFINITE OSTEOPHYTES COLUMN

# moaks_OA_features.loc[((moaks_grades['V01MOSFLA'] >= 2.0) | (moaks_grades['V01MOSFLP'] >= 2.0) |
#                 (moaks_grades['V01MOSFLC'] >= 2.0) | (moaks_grades['V01MOSTL'] >= 2.0) |
#                 (moaks_grades['V01MOSFMA'] >= 2.0) | (moaks_grades['V01MOSFMP'] >= 2.0) |
#                 (moaks_grades['V01MOSFMC'] >= 2.0) | (moaks_grades['V01MOSTM'] >= 2.0)),'Definite_Osteophytes'] = 1.0

# moaks_OA_features.loc[((moaks_grades['V01MOSFLA'] < 2.0) & (moaks_grades['V01MOSFLA'] >= 0.0) & 
#                 (moaks_grades['V01MOSFLP'] < 2.0) & (moaks_grades['V01MOSFLP'] >= 0.0) &
#                 (moaks_grades['V01MOSFLC'] < 2.0) & (moaks_grades['V01MOSFLC'] >= 0.0) & 
#                 (moaks_grades['V01MOSTL'] < 2.0) & (moaks_grades['V01MOSTL'] >= 0.0) &
#                 (moaks_grades['V01MOSFMA'] < 2.0) & (moaks_grades['V01MOSFMA'] >= 0.0) & 
#                 (moaks_grades['V01MOSFMP'] < 2.0) & (moaks_grades['V01MOSFMP'] >= 0.0) &
#                 (moaks_grades['V01MOSFMC'] < 2.0) & (moaks_grades['V01MOSFMC'] >= 0.0) & 
#                 (moaks_grades['V01MOSTM'] < 2.0) & (moaks_grades['V01MOSTM'] >= 0.0)),'Definite_Osteophytes'] = 0.0

# print('With Definite Osteophytes->1.0, Without->0.0 \n{}'.format(moaks_OA_features.Definite_Osteophytes.value_counts(dropna=False)))


# # print('Number of Subjects WITH Definite Osteophytes: {}'.
# #         format((moaks_OA_features['Definite_Osteophytes']==1.0).sum()))

# # print('Number of Subjects WITHOUT Definite Osteophytes: {}'.
# #         format((moaks_OA_features['Definite_Osteophytes']==0.0).sum()))


In [21]:
## NOW WE FILL THE XRAY OSTEOPHYTES COLUMN
moaks_OA_features.loc[((common_fnih_kxr_01['V01XROSFL'] >= 1.0) | (common_fnih_kxr_01['V01XROSFM'] >= 1.0) | 
                (common_fnih_kxr_01['V01XROSTL'] >= 1.0) | (common_fnih_kxr_01['V01XROSTM'] >= 1.0)),'XR_Osteophytes'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_01['V01XROSFL'] < 1.0) & (common_fnih_kxr_01['V01XROSFL'] >= 0.0) &
                (common_fnih_kxr_01['V01XROSFM'] < 1.0) & (common_fnih_kxr_01['V01XROSFM'] >= 0.0) & 
                (common_fnih_kxr_01['V01XROSTL'] < 1.0) & (common_fnih_kxr_01['V01XROSTL'] >= 0.0) &
                (common_fnih_kxr_01['V01XROSTM'] < 1.0) & (common_fnih_kxr_01['V01XROSTM'] >= 0.0)),'XR_Osteophytes'] = 0.0

print('With Xray Osteophytes->1.0, Without->0.0 \n{}'.format(moaks_OA_features.XR_Osteophytes.value_counts(dropna=False)))

With Xray Osteophytes->1.0, Without->0.0 
1.0    538
0.0     46
Name: XR_Osteophytes, dtype: int64


In [22]:
# print(moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes.isna()].index)
# print(common_fnih_kxr_01.iloc[moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes.isna()].index,-5:])

In [23]:
# NOW WE FILL THE BML COLUMN

moaks_OA_features.loc[((common_fnih_kxr_01['V01MBMSFLC'] >= 1.0) | (common_fnih_kxr_01['V01MBMSFLP'] >= 1.0) |
                (common_fnih_kxr_01['V01MBMSTLA'] >= 1.0) | (common_fnih_kxr_01['V01MBMSTLC'] >= 1.0) |
                (common_fnih_kxr_01['V01MBMSTLP'] >= 1.0) | (common_fnih_kxr_01['V01MBMSFMC'] >= 1.0) | 
                (common_fnih_kxr_01['V01MBMSFMP'] >= 1.0) | (common_fnih_kxr_01['V01MBMSTMA'] >= 1.0) | 
                (common_fnih_kxr_01['V01MBMSTMC'] >= 1.0) | (common_fnih_kxr_01['V01MBMSTMP'] >= 1.0)),'BML'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_01['V01MBMSFLC'] < 1.0) & (common_fnih_kxr_01['V01MBMSFLC'] >= 0.0) &
                (common_fnih_kxr_01['V01MBMSFLP'] < 1.0) & (common_fnih_kxr_01['V01MBMSFLP'] >= 0.0) &
                (common_fnih_kxr_01['V01MBMSTLA'] < 1.0) & (common_fnih_kxr_01['V01MBMSTLA'] >= 0.0) &
                (common_fnih_kxr_01['V01MBMSTLC'] < 1.0) & (common_fnih_kxr_01['V01MBMSTLC'] >= 0.0) &
                (common_fnih_kxr_01['V01MBMSTLP'] < 1.0) & (common_fnih_kxr_01['V01MBMSTLP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MBMSFMC'] < 1.0) & (common_fnih_kxr_01['V01MBMSFMC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MBMSFMP'] < 1.0) & (common_fnih_kxr_01['V01MBMSFMP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MBMSTMA'] < 1.0) & (common_fnih_kxr_01['V01MBMSTMA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MBMSTMC'] < 1.0) & (common_fnih_kxr_01['V01MBMSTMC'] >= 0.0) & 
                (common_fnih_kxr_01['V01MBMSTMP'] < 1.0) & (common_fnih_kxr_01['V01MBMSTMP'] >= 0.0)),'BML'] = 0.0

print('With BML->1.0, Without->0.0 \n{}'.format(moaks_OA_features.BML.value_counts(dropna=False)))


# print('Number of Subjects WITH Bone Marrow Lesions: {}'.
#         format((moaks_OA_features['BML']==1.0).sum()))

# print('Number of Subjects WITHOUT Bone Marrow Lesions: {}'.
#         format((moaks_OA_features['BML']==0.0).sum()))

With BML->1.0, Without->0.0 
1.0    278
0.0    264
NaN     42
Name: BML, dtype: int64


In [24]:
# AND NOW WE FILL THE MENISCUS DEGRADATION COLUMN

moaks_OA_features.loc[((common_fnih_kxr_01['V01MMTLA'] >= 2.0) | (common_fnih_kxr_01['V01MMTLB'] >= 2.0) |
                (common_fnih_kxr_01['V01MMTLP'] >= 2.0) | (common_fnih_kxr_01['V01MMRTL'] >= 2.0) |
                (common_fnih_kxr_01['V01MMTMA'] >= 2.0) | (common_fnih_kxr_01['V01MMTMB'] >= 2.0) |
                (common_fnih_kxr_01['V01MMTMP'] >= 2.0) | (common_fnih_kxr_01['V01MMRTM'] >= 2.0) |
                (common_fnih_kxr_01['V01MMXLL'] >= 1.0) | (common_fnih_kxr_01['V01MMXLA'] >= 1.0) |
                (common_fnih_kxr_01['V01MMXMM'] >= 1.0) | (common_fnih_kxr_01['V01MMXMA'] >= 1.0)),'Meniscus_Degradation'] = 1.0

moaks_OA_features.loc[((common_fnih_kxr_01['V01MMTLA'] < 2.0) & (common_fnih_kxr_01['V01MMTLA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMTLB'] < 2.0) & (common_fnih_kxr_01['V01MMTLB'] >= 0.0) &
                (common_fnih_kxr_01['V01MMTLP'] < 2.0) & (common_fnih_kxr_01['V01MMTLP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMRTL'] < 2.0) & (common_fnih_kxr_01['V01MMRTL'] >= 0.0) &
                (common_fnih_kxr_01['V01MMTMA'] < 2.0) & (common_fnih_kxr_01['V01MMTMA'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMTMB'] < 2.0) & (common_fnih_kxr_01['V01MMTMB'] >= 0.0) &
                (common_fnih_kxr_01['V01MMTMP'] < 2.0) & (common_fnih_kxr_01['V01MMTMP'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMRTM'] < 2.0) & (common_fnih_kxr_01['V01MMRTM'] >= 0.0) &
                (common_fnih_kxr_01['V01MMXLL'] < 1.0) & (common_fnih_kxr_01['V01MMXLL'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMXLA'] < 1.0) & (common_fnih_kxr_01['V01MMXLA'] >= 0.0) &
                (common_fnih_kxr_01['V01MMXMM'] < 1.0) & (common_fnih_kxr_01['V01MMXMM'] >= 0.0) & 
                (common_fnih_kxr_01['V01MMXMA'] < 1.0) & (common_fnih_kxr_01['V01MMXMA'] >= 0.0)),'Meniscus_Degradation'] = 0.0

print('With Meniscal Degradation->1.0, Without->0.0 \n{}'.format(moaks_OA_features.Meniscus_Degradation.value_counts(dropna=False)))


# print('Number of Subjects WITH Meniscus Degradation: {}'.
#         format((moaks_OA_features['Meniscus_Degradation']==1.0).sum()))

# print('Number of Subjects WITHOUT Meniscus Degradation: {}'.
#         format((moaks_OA_features['Meniscus_Degradation']==0.0).sum()))

With Meniscal Degradation->1.0, Without->0.0 
1.0    481
0.0     98
NaN      5
Name: Meniscus_Degradation, dtype: int64


In [25]:
print('Number of NaN values per column: {}'.format(moaks_OA_features.isna().sum().to_list()))

Number of NaN values per column: [0, 0, 0, 10, 5, 5, 42, 5]


In [26]:
moaks_OA_features

Unnamed: 0,ID,SIDE,XR_Osteophytes,FullTCL,AnyTCL,PartialTCL,BML,Meniscus_Degradation
0,9001695,1,1.0,,1.0,1.0,1.0,1.0
1,9002116,2,1.0,0.0,1.0,1.0,1.0,1.0
2,9002430,1,1.0,0.0,1.0,1.0,0.0,1.0
3,9002817,1,1.0,0.0,1.0,1.0,1.0,1.0
4,9003316,1,1.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...
579,9993833,2,1.0,0.0,1.0,1.0,1.0,1.0
580,9994408,1,1.0,0.0,1.0,1.0,1.0,1.0
581,9995338,2,1.0,0.0,1.0,1.0,0.0,0.0
582,9996098,1,1.0,0.0,1.0,1.0,0.0,1.0


In [27]:
moaks_OA_features.to_csv('moaks_fnih_sq01_OA_features.csv')

# NOW WE CONSTRUCT THE DF THAT CONTAINS THE OA CRITERIA AS COLUMNS

In [28]:
moaks_OA_criteria = pd.DataFrame(columns=['ID','SIDE',
             'Osteo_AND_FullTCL',
             'Osteo_AND_PartialTCL_AND_BML',
             'Osteo_AND_PartialTCL_AND_Meniscus_Degradation',
             'Osteo_AND_BML_AND_Meniscus_Degradation',
             'FullTCL_AND_BML_AND_Meniscus_Degradation'])
# the ID and SIDE columns are integers
moaks_OA_criteria[['ID','SIDE']] = common_fnih_kxr_01[['ID','SIDE']]
# and we fill the rest of the df with np.nan values
moaks_OA_criteria.iloc[:,2:] = np.nan
moaks_OA_criteria

Unnamed: 0,ID,SIDE,Osteo_AND_FullTCL,Osteo_AND_PartialTCL_AND_BML,Osteo_AND_PartialTCL_AND_Meniscus_Degradation,Osteo_AND_BML_AND_Meniscus_Degradation,FullTCL_AND_BML_AND_Meniscus_Degradation
0,9001695,1,,,,,
1,9002116,2,,,,,
2,9002430,1,,,,,
3,9002817,1,,,,,
4,9003316,1,,,,,
...,...,...,...,...,...,...,...
579,9993833,2,,,,,
580,9994408,1,,,,,
581,9995338,2,,,,,
582,9996098,1,,,,,


In [29]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['FullTCL'] == 1.0)),'Osteo_AND_FullTCL'] = 1.0
         
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['FullTCL'] != 1.0)),'Osteo_AND_FullTCL'] = 0.0

print('With XR Osteophytes AND FullTCL->1.0, Without->0.0: \n{}'.
        format(moaks_OA_criteria['Osteo_AND_FullTCL'].value_counts(dropna=False)))        

With XR Osteophytes AND FullTCL->1.0, Without->0.0: 
0.0    519
1.0     65
Name: Osteo_AND_FullTCL, dtype: int64


In [30]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['PartialTCL'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0)),'Osteo_AND_PartialTCL_AND_BML'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['PartialTCL'] != 1.0) |
            (moaks_OA_features['BML'] != 1.0)),'Osteo_AND_PartialTCL_AND_BML'] = 0.0
print('With XR Osteophytes AND PartialTCL AND BML->1.0, Without->0.0 \n{}'.
        format(moaks_OA_criteria.Osteo_AND_PartialTCL_AND_BML.value_counts(dropna=False)))

With XR Osteophytes AND PartialTCL AND BML->1.0, Without->0.0 
0.0    362
1.0    222
Name: Osteo_AND_PartialTCL_AND_BML, dtype: int64


In [31]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['PartialTCL'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['PartialTCL'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] = 0.0
print('With Osteophytes AND Partial TCL AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 

With Osteophytes AND Partial TCL AND Meniscus_Degradation->1.0, Without->0.0: 
1.0    376
0.0    208
Name: Osteo_AND_PartialTCL_AND_Meniscus_Degradation, dtype: int64


In [32]:
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'Osteo_AND_BML_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['XR_Osteophytes'] != 1.0) | 
            (moaks_OA_features['BML'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'Osteo_AND_BML_AND_Meniscus_Degradation'] = 0.0            
print('With Osteophytes AND BML AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 

With Osteophytes AND BML AND Meniscus_Degradation->1.0, Without->0.0: 
0.0    337
1.0    247
Name: Osteo_AND_BML_AND_Meniscus_Degradation, dtype: int64


In [33]:
moaks_OA_criteria.loc[((moaks_OA_features['FullTCL'] == 1.0) & 
            (moaks_OA_features['BML'] == 1.0) & 
            (moaks_OA_features['Meniscus_Degradation'] == 1.0)),'FullTCL_AND_BML_AND_Meniscus_Degradation'] = 1.0
moaks_OA_criteria.loc[((moaks_OA_features['FullTCL'] != 1.0) | 
            (moaks_OA_features['BML'] != 1.0) |
            (moaks_OA_features['Meniscus_Degradation'] != 1.0)),'FullTCL_AND_BML_AND_Meniscus_Degradation'] = 0.0
print('With FullTCL AND BML AND Meniscus_Degradation->1.0, Without->0.0: \n{}'.
        format((moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation'].value_counts(dropna=False)))) 


With FullTCL AND BML AND Meniscus_Degradation->1.0, Without->0.0: 
0.0    540
1.0     44
Name: FullTCL_AND_BML_AND_Meniscus_Degradation, dtype: int64


In [34]:
print('Number of subjects without OA for each criterion is: \n{}'.format(moaks_OA_criteria.isna().sum()))

Number of subjects without OA for each criterion is: 
ID                                               0
SIDE                                             0
Osteo_AND_FullTCL                                0
Osteo_AND_PartialTCL_AND_BML                     0
Osteo_AND_PartialTCL_AND_Meniscus_Degradation    0
Osteo_AND_BML_AND_Meniscus_Degradation           0
FullTCL_AND_BML_AND_Meniscus_Degradation         0
dtype: int64


In [35]:
moaks_OA_criteria.fillna(-1.0,inplace=True)

In [36]:
moaks_OA_criteria.to_csv('moaks_fnih_sq01_OA_criteria.csv')

In [37]:
moaks_osteo_FullTCL = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_FullTCL']==1.0),
                        ['ID','SIDE','Osteo_AND_FullTCL']]

moaks_osteo_PartialTCL_bml = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML']==1.0),
                        ['ID','SIDE','Osteo_AND_PartialTCL_AND_BML']]

moaks_osteo_PartialTCL_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','Osteo_AND_PartialTCL_AND_Meniscus_Degradation']]

moaks_osteo_bml_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','Osteo_AND_BML_AND_Meniscus_Degradation']]


moaks_FullTCL_bml_meniscus = moaks_OA_criteria.loc[(moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation']==1.0),
                        ['ID','SIDE','FullTCL_AND_BML_AND_Meniscus_Degradation']]

In [38]:
# NOW WE FIND THOSE WHO SATISFY AT LEAST ONE OF THE OA CRITERIA
moaks_OA_criteria['moaks_OA'] = np.nan
moaks_OA_criteria.loc[((moaks_OA_criteria['Osteo_AND_FullTCL']==1.0) |
                (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML']==1.0) |
                (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation']==1.0) |
                (moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation']==1.0) |
                (moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation']==1.0)),'moaks_OA'] = 1.0
# moaks_OA_criteria.loc[((moaks_OA_criteria['Osteo_AND_FullTCL'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_BML'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_PartialTCL_AND_Meniscus_Degradation'] != 0.0) |
#                 (moaks_OA_criteria['Osteo_AND_BML_AND_Meniscus_Degradation'] != 0.0) |
#                 (moaks_OA_criteria['FullTCL_AND_BML_AND_Meniscus_Degradation'] != 0.0)),'moaks_OA'] = 0.0
moaks_OA_criteria.fillna(0.0,inplace=True)
print('Number of subjects that satisfy at least one OA criterion is : {}'.format((moaks_OA_criteria['moaks_OA']==1.0).sum()))

Number of subjects that satisfy at least one OA criterion is : 466


In [39]:
moaks_OA_criteria['moaks_OA'].value_counts()

1.0    466
0.0    118
Name: moaks_OA, dtype: int64

# NOW WE CREATE THE DATAFRAME CONTAINING INFORMATION REGARDING 
# 1.MOAKS FEATURES 
# 2.MOAKS CRITERIA
# 3.MOAKS GRADES
# 4.KL GRADES
# 5.MOAKS-KL GRADES CROSSTABLE

In [40]:
# Here we upload the DataFrame with the KL grades
# and we merge the MOAKS and KL dataframes
kl_id_side_grade = pd.read_csv('/home/anastasis/EMC_Thesis/My_codes/KL_readings/KL_SQ01_ID_SIDE_GRADE.csv')
moaks_kl_and_criteria = pd.merge(moaks_OA_criteria,kl_id_side_grade,on=['ID','SIDE'],how='inner')
print(moaks_kl_and_criteria.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 584 entries, 0 to 583
Data columns (total 9 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   ID                                             584 non-null    int64  
 1   SIDE                                           584 non-null    int64  
 2   Osteo_AND_FullTCL                              584 non-null    float64
 3   Osteo_AND_PartialTCL_AND_BML                   584 non-null    float64
 4   Osteo_AND_PartialTCL_AND_Meniscus_Degradation  584 non-null    float64
 5   Osteo_AND_BML_AND_Meniscus_Degradation         584 non-null    float64
 6   FullTCL_AND_BML_AND_Meniscus_Degradation       584 non-null    float64
 7   moaks_OA                                       584 non-null    float64
 8   V01XRKL                                        584 non-null    float64
dtypes: float64(7), int64(2)
memory usage: 45.6 KB
None


In [41]:
print('Number of different KL grades in the moaks_kl_and_criteria dataframe:\n{}'
        .format(moaks_kl_and_criteria['V01XRKL'].value_counts()))


Number of different KL grades in the moaks_kl_and_criteria dataframe:
2.0    296
3.0    224
1.0     59
4.0      5
Name: V01XRKL, dtype: int64


In [42]:
moaks_kl_and_criteria.to_csv('moaks_fnih_sq01_kl_grades_and_features.csv')


In [43]:
moaks_KL_OA_all_info = pd.merge(moaks_OA_features,moaks_kl_and_criteria,on=['ID','SIDE'],how='outer')

In [44]:
moaks_KL_OA_all_info.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 584 entries, 0 to 583
Data columns (total 15 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   ID                                             584 non-null    int64  
 1   SIDE                                           584 non-null    int64  
 2   XR_Osteophytes                                 584 non-null    float64
 3   FullTCL                                        574 non-null    float64
 4   AnyTCL                                         579 non-null    float64
 5   PartialTCL                                     579 non-null    float64
 6   BML                                            542 non-null    float64
 7   Meniscus_Degradation                           579 non-null    float64
 8   Osteo_AND_FullTCL                              584 non-null    float64
 9   Osteo_AND_PartialTCL_AND_BML                   584 non

In [45]:
moaks_KL_OA_all_info.to_csv('moaks_fnih_sq01_all_info.csv')

In [46]:
all_moaks_grades_sq01 = moaks_KL_OA_all_info.loc[:,['ID','SIDE','moaks_OA']]
all_moaks_vs_kl_grades_sq01 = moaks_KL_OA_all_info.loc[:,['ID','SIDE','moaks_OA','V01XRKL']]

In [47]:
print('Sum of FNIH_SQ01 subjects that have MOAKS = 0.0: {}'.format((all_moaks_grades_sq01['moaks_OA']==0.0).sum()))
print('Sum of FNIH_SQ01 subjects that have MOAKS = 1.0: {}'.format((all_moaks_grades_sq01['moaks_OA']==1.0).sum()))

Sum of FNIH_SQ01 subjects that have MOAKS = 0.0: 118
Sum of FNIH_SQ01 subjects that have MOAKS = 1.0: 466


In [48]:
moaks_fnih_sq01_only_moaks_one = all_moaks_vs_kl_grades_sq01.loc[(all_moaks_vs_kl_grades_sq01['moaks_OA']==1.0)]
print(moaks_fnih_sq01_only_moaks_one,'\n','number of subjects with moaks grade = 1.0: ',moaks_fnih_sq01_only_moaks_one.moaks_OA.count())

          ID  SIDE  moaks_OA  V01XRKL
0    9001695     1       1.0      2.0
1    9002116     2       1.0      3.0
2    9002430     1       1.0      2.0
3    9002817     1       1.0      3.0
4    9003316     1       1.0      2.0
..       ...   ...       ...      ...
578  9993650     2       1.0      4.0
579  9993833     2       1.0      3.0
580  9994408     1       1.0      3.0
582  9996098     1       1.0      3.0
583  9997381     1       1.0      2.0

[466 rows x 4 columns] 
 number of subjects with moaks grade = 1.0:  466


In [49]:
moaks_fnih_sq01_only_moaks_zero = all_moaks_vs_kl_grades_sq01.loc[(all_moaks_vs_kl_grades_sq01['moaks_OA']==0.0)]
print(moaks_fnih_sq01_only_moaks_zero,'\n','number of subjects with moaks grade = 0.0: ',moaks_fnih_sq01_only_moaks_zero.moaks_OA.count())

          ID  SIDE  moaks_OA  V01XRKL
5    9003380     1       0.0      1.0
7    9004175     1       0.0      2.0
16   9015798     1       0.0      2.0
20   9021102     2       0.0      2.0
21   9022789     1       0.0      2.0
..       ...   ...       ...      ...
549  9936312     2       0.0      2.0
550  9936451     1       0.0      2.0
567  9969009     2       0.0      2.0
570  9981798     1       0.0      1.0
581  9995338     2       0.0      2.0

[118 rows x 4 columns] 
 number of subjects with moaks grade = 0.0:  118


In [50]:
all_moaks_vs_kl_grades_sq01

Unnamed: 0,ID,SIDE,moaks_OA,V01XRKL
0,9001695,1,1.0,2.0
1,9002116,2,1.0,3.0
2,9002430,1,1.0,2.0
3,9002817,1,1.0,3.0
4,9003316,1,1.0,2.0
...,...,...,...,...
579,9993833,2,1.0,3.0
580,9994408,1,1.0,3.0
581,9995338,2,0.0,2.0
582,9996098,1,1.0,3.0


In [51]:
# Here we save the dataframes containing the information we want
all_moaks_vs_kl_grades_sq01.to_csv('moaks_fnih_sq01_moaks_vs_kl.csv')
moaks_fnih_sq01_only_moaks_one.to_csv('moaks_fnih_sq01_only_moaks_one.csv')
moaks_fnih_sq01_only_moaks_zero.to_csv('moaks_fnih_sq01_only_moaks_zero.csv')
all_moaks_grades_sq01.to_csv('moaks_fnih_sq01_moaks_grades.csv')

In [52]:
moaks_OA_KL_crosstab = pd.crosstab(all_moaks_vs_kl_grades_sq01.moaks_OA,all_moaks_vs_kl_grades_sq01.V01XRKL,margins=True,margins_name='Total')
moaks_OA_KL_crosstab.to_csv('moaks_fnih_sq01_OA_KL_crosstab.csv')
moaks_OA_KL_crosstab

V01XRKL,1.0,2.0,3.0,4.0,Total
moaks_OA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,48,68,2,0,118
1.0,11,228,222,5,466
Total,59,296,224,5,584


In [53]:
# NOW WE SAVE THE SUBJECTS WITH ZERO FEATURES IN ORDER TO INVESTIGATE THE INCIDENCE OA
moaks_OA_osteo_zero = moaks_OA_features.loc[moaks_OA_features.XR_Osteophytes == 0.0]
moaks_OA_osteo_zero.to_csv('moaks_fnih_sq01_Zero_Osteo.csv')
print('Number of subjects without definite osteophytes in FNIH_SQ01 is : {}'.format(moaks_OA_osteo_zero.shape[0]))

moaks_OA_TCL_zero = moaks_OA_features.loc[(moaks_OA_features.FullTCL == 0.0) & (moaks_OA_features.PartialTCL == 0.0)]
moaks_OA_TCL_zero.to_csv('moaks_fnih_sq01_Zero_TCL.csv')
print('Number of subjects without TCL in FNIH_SQ01 is : {}'.format(moaks_OA_TCL_zero.shape[0]))

moaks_OA_bml_zero = moaks_OA_features.loc[moaks_OA_features.BML == 0.0]
moaks_OA_bml_zero.to_csv('moaks_fnih_sq01_Zero_BML.csv')
print('Number of subjects without BML in FNIH_SQ01 is : {}'.format(moaks_OA_bml_zero.shape[0]))

moaks_OA_meniscus_zero = moaks_OA_features.loc[moaks_OA_features.Meniscus_Degradation == 0.0]
moaks_OA_meniscus_zero.to_csv('moaks_fnih_sq01_Zero_Meniscus.csv')
print('Number of subjects without Meniscus Degradation in FNIH_SQ01 is : {}'.format(moaks_OA_meniscus_zero.shape[0]))


Number of subjects without definite osteophytes in FNIH_SQ01 is : 46
Number of subjects without TCL in FNIH_SQ01 is : 32
Number of subjects without BML in FNIH_SQ01 is : 264
Number of subjects without Meniscus Degradation in FNIH_SQ01 is : 98
