Creates dummy dataset of patient data including randomly generated location info.

In [0]:
import math, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
FILENAME="data.csv"

generate dummy clinics

In [0]:
# size of the map
MAP_DIM = (1000, 1000)

In [28]:
NUM_CLINICS = int(np.sqrt(max(MAP_DIM[0], MAP_DIM[1])))
NUM_CLINICS

31

In [0]:
def generate_clinics(num=NUM_CLINICS, max_x=MAP_DIM[0], max_y=MAP_DIM[1], 
                     saveto="clinics.csv"):
    ''' 
    generates clinic location data by sampling from 2d uniform
    
    Args:
    num: number of clinics
    max_x, max_y: dimensions of map
    saveto: output csv file name
    '''
    # randomly generate clinics
    clinics = []
    for i in range(num):
        while True:
            x = random.randint(0, max_x - 1)
            y = random.randint(0, max_y - 1)
            clinic = (x, y)
            if clinic not in clinics:
                clinics.append((x,y))
                break
    assert len(clinics) == num, "generated %d clinics" %len(clinics)

    if len(saveto) > 0:
        pd.DataFrame(clinics).to_csv(saveto)
    return clinics

In [0]:
clinics = generate_clinics()

generate dummy patients

---



In [0]:
data = pd.read_csv(FILENAME)

In [69]:
data.shape

(294, 14)

In [0]:
def generate_patients(clinics = clinics, file=FILENAME, max_x=MAP_DIM[0], 
                    max_y=MAP_DIM[1], compute_dists=True, concact=True, 
                    saveto="patients.csv"):
    ''' 
    generates patient data by reading dummy data set and randomly sampling from 
    uniform for location
    
    Args:
    clinics: list of clinics as (x,y) coordinates
    file: filename of dataset
    max_x, max_y: dimensions of map
    compute_dists: if True compute distances between client and all clinics as 
        features; otherwise use (x,y) 
    concact: whether to return data as concacted data frame 
        or separately (data, sampled dists)
    saveto: output csv filename
    '''
    data = pd.read_csv(file)
    num_patients = data.shape[0]

    x = len(clinics) if compute_dists else 2
    dist= {i:[] for i in range(x)} 

    for _ in range(num_patients):
        x = random.randint(0, max_x - 1)
        y = random.randint(0, max_y - 1)

        if compute_dists:
            # compute distance to all the available clinics as features
            for i, (c_x, c_y) in enumerate(clinics):
                distance = np.linalg.norm([x-c_x, y-c_y])
                dist[i].append(distance)
        else:
            # use location coordinates as features
            dist[0] = x
            dist[1] = y

    dist = pd.DataFrame(dist)
    concacted = pd.concat([data, dist], axis=1)

    if len(saveto) > 0:
        concacted.to_csv(saveto)

    return concacted if concact else (data, dist)
    

In [66]:
generate_patients()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0,151.343979,246.489351,593.043000,350.035712,469.323982,172.861216,318.692642,351.923287,463.320623,282.639346,403.316253,216.009259,493.790441,288.979238,455.896918,509.318172,187.275199,232.639206,647.963734,556.367684,358.577746,370.696641,618.546684,375.728625,524.690385,486.728877,437.599132,521.359761,527.163163,214.133136,63.560994
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0,547.074035,412.043687,208.396257,134.536240,122.409150,568.958698,714.737714,614.391569,505.395884,628.600032,539.304181,612.053102,145.003448,431.353683,190.905736,903.779840,393.066152,172.754161,728.660415,457.108302,409.518009,118.359621,494.417839,770.675678,243.287895,724.325894,824.796945,691.960259,470.023404,348.206835,367.185239
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0,575.647462,507.940941,272.060655,84.118963,236.372587,599.307934,744.356098,573.069804,625.521383,617.778277,467.115617,640.684790,253.665134,533.428533,63.820060,921.308852,478.122369,250.319795,849.617561,337.146852,524.317652,61.684682,621.421757,791.545956,362.305120,822.172731,832.438586,801.027465,596.518231,441.678616,387.253147
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0,338.338588,384.376118,472.069910,164.149322,375.000000,362.592057,504.385765,350.656527,570.406872,372.651312,300.695860,401.808412,400.181209,422.711486,255.162693,675.343616,333.685481,184.100516,783.472399,358.113111,457.200175,188.621314,657.560644,547.095970,473.144798,675.325847,585.232432,687.501273,592.483755,324.599754,153.551294
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0,176.504957,502.063741,856.638197,563.620440,741.636029,177.341479,198.338095,194.064422,716.854937,43.566042,354.153921,163.856645,766.891127,541.074856,646.025541,294.716813,448.005580,506.616226,864.783210,623.682612,624.769558,588.102882,897.062428,191.637679,808.666186,630.967511,192.566872,707.407238,799.180831,489.326067,288.321002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,52,1,4,160,331,0,0,94,1,2.5,?,?,?,1,484.546179,572.458732,525.549237,211.992924,468.782466,508.355191,636.381175,357.328980,751.073898,447.219186,219.913619,541.074856,490.633264,610.099172,214.308189,779.400411,522.289192,349.091679,968.031508,171.093542,638.501370,229.368699,812.065268,663.272191,587.579782,862.422750,677.738150,876.042236,760.019079,511.414705,322.691803
290,54,0,3,130,294,0,1,100,1,0.0,2,?,?,1,738.628459,432.539016,725.744445,799.876240,635.491935,738.541807,804.121881,1028.805618,222.712820,930.197828,1069.916352,762.105636,635.609943,390.000000,892.415262,976.426136,491.769255,624.591066,93.338095,1127.483038,335.900283,799.280301,279.257945,876.548344,537.275535,375.310538,972.205740,257.388811,241.536747,476.707457,725.986914
291,56,1,4,155,342,1,0,150,1,3.0,2,?,?,1,489.524259,647.113591,656.920848,340.147027,596.858442,511.196635,619.556293,276.739950,842.119350,400.052497,108.724422,535.840461,619.356117,687.576905,343.198193,734.412691,591.919758,453.047459,1051.597832,186.601179,728.920435,359.050136,924.863774,632.007120,712.960728,914.636540,627.925155,943.375323,864.473250,591.898640,365.826462
292,58,0,2,180,393,0,0,110,1,1.0,2,?,7,1,105.621967,367.535032,698.736717,418.169822,581.737054,127.577427,253.842471,235.238177,585.806282,151.855194,328.671569,157.012738,606.906912,409.140563,510.722038,419.237403,309.588436,346.101141,756.861942,543.994485,485.185532,441.860838,749.234943,291.109945,648.284660,560.544378,332.385319,615.338118,656.246905,343.036441,128.222463
