In [1]:
#########################
# CODE WITH MANUAL IMPLEMENTATION OF FLD(Code From Scratch) 2 FACTOR(S)
# Classification Type : Fischer Linear Discriminant Analysis (FLD)
# Using only Two factors BMD vs Fractures(1/0)
# DATASET :"Bone Mineral Density(bmd.csv)"
#########################

In [2]:
#########################
# Importing Basic Libraries
#########################
import pandas as pd
import numpy as np
import math

In [3]:
#########################
# Loading dataset
#########################
df = pd.read_csv("E:/Codes/Jupyter/Datasets/bmd.csv")
df.head(5)

Unnamed: 0,id,age,sex,fracture,weight_kg,height_cm,medication,waiting_time,bmd
0,469,57.052768,F,no fracture,64.0,155.5,Anticonvulsant,18,0.8793
1,8724,75.741225,F,no fracture,78.0,162.0,No medication,56,0.7946
2,6736,70.7789,M,no fracture,73.0,170.5,No medication,10,0.9067
3,24180,78.247175,F,no fracture,60.0,148.0,No medication,14,0.7112
4,17072,54.191877,M,no fracture,55.0,161.0,No medication,20,0.7909


In [4]:
################
# Cleaning Data
###############

from sklearn.preprocessing import LabelEncoder

# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
df['fracture'] = labelencoder.fit_transform(df['fracture'])
df['medication'] = labelencoder.fit_transform(df['medication'])
df['sex'] = labelencoder.fit_transform(df['sex'])
df = df.drop(['id'],axis = 1)
df

Unnamed: 0,age,sex,fracture,weight_kg,height_cm,medication,waiting_time,bmd
0,57.052768,0,1,64.0,155.5,0,18,0.8793
1,75.741225,0,1,78.0,162.0,2,56,0.7946
2,70.778900,1,1,73.0,170.5,2,10,0.9067
3,78.247175,0,1,60.0,148.0,2,14,0.7112
4,54.191877,1,1,55.0,161.0,2,20,0.7909
...,...,...,...,...,...,...,...,...
164,77.982543,1,0,74.0,164.0,2,49,0.7941
165,50.285303,0,0,59.0,161.0,2,6,0.7971
166,46.359721,1,0,67.0,169.0,2,10,0.8037
167,54.788368,1,0,70.0,166.0,2,14,0.8072


In [5]:
##########################
# Mean & Sigma Calculations
##########################

frac = df[df['fracture']==0].loc[:,['bmd']].values
frac_mean = round(np.mean(frac),6)
no_frac = df[df['fracture']==1].loc[:,['bmd']].values
no_frac_mean = round(np.mean(no_frac),6)

count = 0
for i in range(len(frac)):
    count += (frac[i] - frac_mean)**2
for j in range(len(no_frac)):
    count += (no_frac[j] - no_frac_mean)**2
sigma = math.sqrt((count)/(len(df)-2))
sigma = round(sigma,6)

In [6]:
print(frac_mean,no_frac_mean,sigma)

0.623308 0.850245 0.130539


In [7]:
#########################
# Calculating Probablility of Fractures
# And Assigning values to a dataset
#########################
prob_frac = len(frac)/len(df)
prob_no_frac = len(no_frac)/len(df)

data = {'Fracture':[prob_frac],
        'No fracture':[prob_no_frac]}
  
# Create DataFrame
df_prob = pd.DataFrame(data)
df_prob

Unnamed: 0,Fracture,No fracture
0,0.295858,0.704142


In [8]:
#for fracture
0.54*frac_mean/sigma**2 - frac_mean**2/(2*sigma**2) + math.log(df_prob["Fracture"])

#for no fracture
0.54*no_frac_mean/sigma**2 - no_frac_mean**2/(2*sigma**2) + math.log(df_prob["No fracture"])

5.381123386450154

In [9]:
#########################
# Function to predict fractures/no fractures 
# based on BMD values input by the user
#########################
def test_func(bmd):
    Fracture = (bmd*frac_mean/sigma**2 - frac_mean**2/(2*sigma**2) + math.log(df_prob["Fracture"]))
    No_Fracture = (bmd*no_frac_mean/sigma**2 - no_frac_mean**2/(2*sigma**2) + math.log(df_prob["No fracture"]))
    if Fracture > No_Fracture:
        print("FRACTURE probably")
    else:
        print("NO FRACTURE probably")

In [11]:
#########################
# Input Function
#########################
Num = float(input("Enter bmd: "))
test_func(Num)

Enter bmd: 0.1
FRACTURE probably
