### Import packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

### Import dataset

In [2]:
df = pd.read_csv('plid.csv')
df.head()

Unnamed: 0,Timestamp,Id,Age,Sex,Occupation,Low back pain,Low back pain with Sciatica,Bowel Bladder Involvement,Straight Leg Raising Test,Femoral Stretching Test,...,Operative Findings,Type of Operation,Annulus,Pre operative ODI,Post operative ODI,Pre operative NRS back pain,Post operative NRS back pain,Surgery outcome according to Macnab criteria,Pre operative NRS leg pain,Post operative NRS leg pain
0,2025/08/02 7:29:18 pm GMT+6,1201,36-40,Male,Manual worker,Yes,Left,No,Restricted,Negative,...,Lateral,Laminotomy,Intact,,,,,,,
1,2025/08/02 7:30:34 pm GMT+6,1202,41-45,Male,Sedentary worker,Yes,Right,No,Restricted,Negative,...,Central Disk,Laminotomy,Intact,77.0,0.0,6.0,0.0,E,6.0,0.0
2,2025/08/02 7:42:41 pm GMT+6,1203,36-40,Female,Sedentary worker,Yes,Both,Yes,Restricted,Negative,...,Extruded Disc,Laminotomy,Ruptured,,,,,,,
3,2025/08/02 7:44:01 pm GMT+6,1204,36-40,Male,Manual worker,Yes,Both,No,Restricted,Negative,...,Central Disk,Laminotomy,Intact,88.0,64.0,10.0,7.0,P,8.0,6.0
4,2025/08/02 7:45:27 pm GMT+6,1205,56-60,Female,Sedentary worker,Yes,Right,No,Restricted,Negative,...,Lateral,Laminotomy;Unilateral fenestration & disectomy,Ruptured,,,,,,,


### analyze data

In [3]:
#check mising values
df.isnull().sum()

Timestamp                                         0
Id                                                0
Age                                               0
Sex                                               0
Occupation                                       10
Low back pain                                     9
Low back pain with Sciatica                       3
Bowel Bladder Involvement                        10
Straight Leg Raising Test                         9
Femoral Stretching Test                          24
Sensory Involvement                               6
Motor involvement                                 6
Knee Jerk                                        40
Ankle Jerk                                       38
Level of Disc Prolapse                            3
Operative Findings                               11
Type of Operation                                 7
Annulus                                          37
Pre operative ODI                               271
Post operati

In [8]:
#check data types
df.dtypes

Timestamp                                        object
Id                                                int64
Age                                              object
Sex                                              object
Occupation                                       object
Low back pain                                    object
Low back pain with Sciatica                      object
Bowel Bladder Involvement                        object
Straight Leg Raising Test                        object
Femoral Stretching Test                          object
Sensory Involvement                              object
Motor involvement                                object
Knee Jerk                                        object
Ankle Jerk                                       object
Level of Disc Prolapse                           object
Operative Findings                               object
Type of Operation                                object
Annulus                                         

In [10]:
df.shape

(349, 25)

In [13]:
df['Knee Jerk'].unique()

array(['Intact', 'Absent', nan], dtype=object)

In [5]:
df['Surgery outcome according to Macnab criteria'].count()

np.int64(78)

In [6]:
df['Pre operative ODI'].count()

np.int64(78)

In [7]:
df['Post operative ODI'].count()

np.int64(78)

In [8]:
df['Pre operative NRS back pain'].count()

np.int64(78)

In [9]:
df['Post operative NRS back pain'].count()

np.int64(78)

In [11]:
df['Pre operative NRS leg pain'].count()

np.int64(61)

In [10]:
df['Post operative NRS leg pain'].count()

np.int64(61)

### Imputation Phase

In [14]:
# Clone the dataframe
df_copy = df.copy()
# drop post operative columns
x1 = df_copy.drop(columns=['Post operative ODI', 'Post operative NRS back pain', 'Post operative NRS leg pain', 'Surgery outcome according to Macnab criteria'])
x1.columns

Index(['Timestamp', 'Id', 'Age', 'Sex', 'Occupation', 'Low back pain',
       'Low back pain with Sciatica', 'Bowel Bladder Involvement',
       'Straight Leg Raising Test', 'Femoral Stretching Test',
       'Sensory Involvement', 'Motor involvement', 'Knee Jerk', 'Ankle Jerk',
       'Level of Disc Prolapse', 'Operative Findings', 'Type of Operation',
       'Annulus', 'Pre operative ODI', 'Pre operative NRS back pain',
       'Pre operative NRS leg pain'],
      dtype='object')

In [None]:
x1['Knee Jerk'] = x1['Knee Jerk'].fillna('Unknown')
x1['Ankle Jerk'] = x1['Ankle Jerk'].fillna('Unkwnown')


In [22]:
cat_cols = [
    'Occupation', 'Low back pain', 'Low back pain with Sciatica',
    'Bowel Bladder Involvement', 'Straight Leg Raising Test',
    'Femoral Stretching Test', 'Sensory Involvement', 'Motor involvement',
    'Knee Jerk', 'Ankle Jerk', 'Level of Disc Prolapse', 'Operative Findings',
    'Type of Operation', 'Annulus'
]

for col in cat_cols:
    print(f"{col}: {x1[col].unique()}")

Occupation: ['Manual worker' 'Sedentary worker' nan 'Housewife']
Low back pain: ['Yes' 'No' nan]
Low back pain with Sciatica: ['Left' 'Right' 'Both' nan]
Bowel Bladder Involvement: ['No' 'Yes' nan]
Straight Leg Raising Test: ['Restricted' nan 'Not Restricted']
Femoral Stretching Test: ['Negative' nan 'Positive']
Sensory Involvement: ['Involved' 'Not Involved' nan]
Motor involvement: ['Involved' 'Not involved' nan]
Knee Jerk: ['Intact' 'Absent' 'Unknown']
Ankle Jerk: ['Intact' 'Absent' 'Unkwnown']
Level of Disc Prolapse: ['L4/5' 'L5/S1' 'More than one level' 'L4/5;L5/S1' 'L3/4;L4/5'
 'L4/5;L5/S1;More than one level' 'L5/S1;More than one level'
 'L3/4;L4/5;L5/S1;More than one level' 'L3/4' 'L3/4;L5/S1' 'L2/3' nan
 'L1/2;L5/S1']
Operative Findings: ['Lateral' 'Central Disk' 'Extruded Disc' 'Paramedian Disc'
 'Sequestrated Disc' nan 'Lateral;Extruded Disc' 'Hard Disc'
 'Paramedian Disc;Extruded Disc' 'Central Disk;Extruded Disc'
 'Central Disk;Hard Disc' 'Central Disk;Sequestrated Disc'
 '