In [1]:
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd

## AIgorithm.
The main application has three objectives:
- Increase the response time to victims in accidents.
- Increase access to fast health service.
- Determine the best hospital to transport patient.


The algorithm is a simple one that is very easy to implement and based on the KNN system of prediction. Given some parameters of the different hospital in the vicinity of the accident, it attempts to provide victim with the best hospital choice by calculating the value of a distance metric (default=euclidean) between the victim(at origin 0) and these hospitals, taking into special consideration to space availability, inverse speed, and ETA. These features will be calculated and provided by the google maps api.
It then sorts this distance scalar value and returns the top hospitals on the list (These hospitals have the least distance to the victim's location). It is totally unsupervised with the initialization of weights (could be probably made supervised by determining the best feature weights somehow). 


The same can also be applied to an emergency service/body dataset. In this case, the model will make use of the location of the emergency department itself and assume that there is at least 1 free emergency vehicle in this facility.

In [49]:
class FeatureTransformer(TransformerMixin):
    ''' This transformer calculates the inverse speed given a dataset and calculates the probability of 
    no space by subtracting the probability of space availability from 1'''
    def fit(self,X,y=None):
        return self

    def transform(self, X):
        # Copy data
        Xt = X.copy()
        # Calculate the inverse speed
        Xt['Inverse_speed'] =  X['ETA (min)']/X['distance (km)']
        #Xt['mortality_rate(Accidents)'] = Xt['mortality_rate(Accidents)'] *100
        # Transform the space availability feature to probability of no space (subtract from 1)
        Xt['space available(prob.)']   =  (1 - Xt['space available(prob.)'])
        return Xt


class Hosp_Emergency_Transformer(TransformerMixin):
    '''This transformer recommends the best hospital by calculating the distance value (default= euclidean)
       between all hospitals in that region and the victim (assuming he is at position 0). It sorts out this
       distance value and returns the list of hospitals in ascending order (The first holds the least value).
    '''
    
    distance_map = {
        'euclidean': 2,
        'manhattan': 1
    }
    def __init__(self, weights=None, distance='euclidean'):
        #Initialize params
        self.weights  = weights
        self.distance = distance
        self.min_max_scaler = MinMaxScaler()

    def fit(self,X,y=None):
        return self

    def transform(self,X):
        # copy data
        Xt = X.copy()
        # Extract values from dataset
        hospital_vector = Xt.iloc[:,3:].values
        # Normalize data
        #scaled_vector =  self.min_max_scaler.fit_transform(hospital_vector)
        # Calculate the distance scalar value of the hospitals with respect to victims location
        Xt['scalar'] = self.get_distance(hospital_vector) #scaled_vector)
        # Sort hospitals based on the distance scalar quantity.
        Xt.sort_values('scalar',inplace=True)
        self.mean_error(Xt)
        return Xt
    
    def mean_error(self,X):
        eta = X['ETA (min)'] 
        eta_diff = []
        for ind, value in enumerate(eta):
            next_ind = ind + 1
            if next_ind < len(eta):
                eta_diff.append(value - eta[next_ind] )
            else:
                break
        self.error = sum(eta_diff) / len(eta_diff)
        #print(self.error)
            
        
        
    def get_distance(self,data):
        # Create list to store scalar distance value
        scalar_data = []
        # Iterate through each row in the dataset
        for row in data:
            # Initialize distance
            distance = 0.0
            # Go through each feature value in each row
            for feat_index in range(len(row)):
                if self.weights:
                    # if weight was given, use weight to calculate the distance
                    distance += np.power((row[feat_index] * self.weights[feat_index]), self.distance_map[self.distance])
                else:
                    # if no weight, use the values of the features themselves.
                    distance += np.power(row[feat_index], self.distance_map[self.distance])
            # Calculate the square root of distance calculated (euclidean distance)
            scalar_distance = np.sqrt(distance)
            # Append the value to the scalar_data list.
            scalar_data.append(scalar_distance)
        # conver to numpy array and return array (reshaped to a 2-dimensions)
        scalar_data = np.array(scalar_data)
        #print(scalar_data)
        return scalar_data.reshape(-1,1)
    
def best_choices(data, top=3):
    return data.iloc[:top]

## Test Algorithm with data.
Let's synthesize data and test the algorithm with and without weights.

In [50]:
# Columns for dataset.
columns= ['Name','region','contact','distance (km)','ETA (min)','mortality_rate(Accidents)','space available(prob.)']
# Weights to be used for algorithm (pays more attention to ETA, space availability, inverse speed)
# synthetic data
test_data = [['Randle','Surulere','09093453256',10, 30, 0.3, 0.9 ],
            ['St Nicholas','Odan','09093452648',10, 35, 0.6, 0.5 ],
            ['General Hospital','Odan','09093376456',15, 30, 0.3, 0.9 ],
            ['LUTH','Idi-araba','09093297456',11, 32, 0.3, 0.8 ],
            ['LASUTH','ikeja','080234536456',8, 25, 0.65, 0.2 ],
            ['FMC','Ebute-meta','090124536456',15, 23, 0.44, 0.63 ],
            ['Reddington','Lagos Island','090734720456',40, 45, 0.3, 0.98 ],
            ['Verdic','Lagos Island','090534536234',25, 30, 0.22, 0.76 ],
            ['God own','ojodu','07034536456',30, 40, 0.5, 0.39 ],
            ['Military Hospital','Ikorodu','08093236456',5, 10, 0.2, 0.83 ],
            ['Airforce hospital','Ikeja','08093453256',8, 15, 0.85, 0.05 ],
            ['Blue cross','Ogba','08054536480',11, 39, 0.3, 0.66 ],
            ['Solid rock','Grammar School','09092766976',15, 30, 0.43, 0.32 ],
            ['County Hospital','Ogba','08044536456',25, 50, 0.4, 0.7 ]]

test_data = pd.DataFrame(test_data, columns=columns)
test_data.head()

Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.)
0,Randle,Surulere,9093453256,10,30,0.3,0.9
1,St Nicholas,Odan,9093452648,10,35,0.6,0.5
2,General Hospital,Odan,9093376456,15,30,0.3,0.9
3,LUTH,Idi-araba,9093297456,11,32,0.3,0.8
4,LASUTH,ikeja,80234536456,8,25,0.65,0.2


## Test Algorithm on synthetic data without weights.
let's apply algorithm on the synthesized data and check out the result. First, we transform data initially to generate the 'inverse_speed' and 'Probaility of no space' features dynamically. I commented out the MinMaxScaler because it made the performance of the algorithm unstable.

In [51]:
feat_transformer = FeatureTransformer()
transformed_data = feat_transformer.fit_transform(test_data)
transformed_data.head()


Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.),Inverse_speed
0,Randle,Surulere,9093453256,10,30,0.3,0.1,3.0
1,St Nicholas,Odan,9093452648,10,35,0.6,0.5,3.5
2,General Hospital,Odan,9093376456,15,30,0.3,0.1,2.0
3,LUTH,Idi-araba,9093297456,11,32,0.3,0.2,2.909091
4,LASUTH,ikeja,80234536456,8,25,0.65,0.8,3.125


In [52]:
# Determine the top 6 best hospitals using the algorithm without weight initialization.
hosp_transformer = Hosp_Emergency_Transformer()
best_hosp_data = hosp_transformer.fit_transform(transformed_data)
best_choices(best_hosp_data, top=6)

Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.),Inverse_speed,scalar
9,Military Hospital,Ikorodu,8093236456,5,10,0.2,0.17,2.0,11.360849
10,Airforce hospital,Ikeja,8093453256,8,15,0.85,0.95,1.875,17.150528
4,LASUTH,ikeja,80234536456,8,25,0.65,0.8,3.125,26.454265
5,FMC,Ebute-meta,90124536456,15,23,0.44,0.37,1.533333,27.507846
0,Randle,Surulere,9093453256,10,30,0.3,0.1,3.0,31.766334
2,General Hospital,Odan,9093376456,15,30,0.3,0.1,2.0,33.602083


We see that the algorithm does fairly well. This simple model recommended military hospital as the best hospital with an ETA of 10 min, inverse_speed of 2 and space availability probability of 0.17. However for the 4th and 5th, we see that because the lower space availability probability, it rated LASUTH (ETA-25 mins, dist- 15km) over FMC Ebute-meta (ETA - 23 mins, dist-15). This is because it considered the distance more in this case despite the difference in mortality rate and space availability (Features were not scaled so it led to comparisons at different scales.

Also, by weighting these features, will make the algorithm give more attention to some features in cases where it might be needed to consider more parameters. Here, we will deliberately pay more attention to the ETA, inverse_speed, space availability probability. We will do this by reducin the weights for this feature. Let's see how the algorithm performs when trying to recommend the best hospital. 

In [53]:
print(f'The mean of the sum of difference of ETA - {hosp_transformer.error}')

The mean of the sum of difference of ETA - -1.1538461538461537


The negative value for the mean of the sum of difference of ETa here signifies that the model does quite well. This value is calculated using the sorted data. It calculates the sum of the difference between the ETA values for each row in the order they have been sorted and then, finds the mean. The more negative this value, the better the model should be because it means that each ETA value was less than the next.

## Test algorithm on synthetic data with weights.
We do this by reducing the weights of this important parameters.

In [54]:
# Create transformer, however assign weight parameter: ETA, space availability, inverse_speed
weights = [1, 0.6, 1, 1.5, 1.2]
weighted_hosp_transformer = Hosp_Emergency_Transformer(weights)
weighted_best_hosp = weighted_hosp_transformer.fit_transform(transformed_data)
best_choices(weighted_best_hosp, top=6)

Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.),Inverse_speed,scalar
9,Military Hospital,Ikorodu,8093236456,5,10,0.2,0.17,2.0,8.177104
10,Airforce hospital,Ikeja,8093453256,8,15,0.85,0.95,1.875,12.361862
4,LASUTH,ikeja,80234536456,8,25,0.65,0.8,3.125,17.462102
5,FMC,Ebute-meta,90124536456,15,23,0.44,0.37,1.533333,20.477481
0,Randle,Surulere,9093453256,10,30,0.3,0.1,3.0,20.906279
3,LUTH,Idi-araba,9093297456,11,32,0.3,0.2,2.909091,22.4055


In this case, we do not see any change in the weights sufficient enough to alter the decisions of the algorithm.

In [55]:
print(f'The mean of the sum of difference of ETA - {weighted_hosp_transformer.error}')

The mean of the sum of difference of ETA - -1.1538461538461537


## Using the manhattan distance for calculation.

In [56]:

weighted_hosp_transformer = Hosp_Emergency_Transformer(weights,distance='manhattan')
weighted_best_hosp = weighted_hosp_transformer.fit_transform(transformed_data)
best_choices(weighted_best_hosp,top=6)

Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.),Inverse_speed,scalar
9,Military Hospital,Ikorodu,8093236456,5,10,0.2,0.17,2.0,3.722231
10,Airforce hospital,Ikeja,8093453256,8,15,0.85,0.95,1.875,4.639504
4,LASUTH,ikeja,80234536456,8,25,0.65,0.8,3.125,5.347897
5,FMC,Ebute-meta,90124536456,15,23,0.44,0.37,1.533333,5.6245
0,Randle,Surulere,9093453256,10,30,0.3,0.1,3.0,5.661272
3,LUTH,Idi-araba,9093297456,11,32,0.3,0.2,2.909091,5.855844


## Test algorithm on synthetic data without StandardScaler.

In [57]:

weighted_hosp_transformer = Hosp_Emergency_Transformer()
weighted_best_hosp = weighted_hosp_transformer.fit_transform(transformed_data)
best_choices(weighted_best_hosp,top=6)

Unnamed: 0,Name,region,contact,distance (km),ETA (min),mortality_rate(Accidents),space available(prob.),Inverse_speed,scalar
9,Military Hospital,Ikorodu,8093236456,5,10,0.2,0.17,2.0,11.360849
10,Airforce hospital,Ikeja,8093453256,8,15,0.85,0.95,1.875,17.150528
4,LASUTH,ikeja,80234536456,8,25,0.65,0.8,3.125,26.454265
5,FMC,Ebute-meta,90124536456,15,23,0.44,0.37,1.533333,27.507846
0,Randle,Surulere,9093453256,10,30,0.3,0.1,3.0,31.766334
2,General Hospital,Odan,9093376456,15,30,0.3,0.1,2.0,33.602083


Again, unscaled data does better than the scaled data.