In [1]:
import bisect
import csv
import math
import multiprocessing
import os
import re
import sys
import shutil

import pandas as pd
from tqdm.notebook import tqdm

from utils import clean_text, sanitize_text

# Misspelling Dataset (Clinspell) of MIMIC-III v1.4

This scripts provides the Clinspell dataset ([GitHub](https://github.com/clips/clinspell)) in MIMIC-III v1.4, which is converted from v1.3, the version that the Clinspell dataset originally used.
By the update in the MIMIC-III dataset from v1.3 to v1.4 (See the release note of MIMIC-III 1.4), there are some changes in the `NOTEEVENTS` table.
The `ROW_ID`s and the anonymized fields has been changed. The total number of notes also changed.
So we cannot use `ROW_ID`s to identify the row, and the char positions of the misspellings in the texts are not same.

Below are the annotations of the clinspell misspelling dataset, converted from the line numbers of the raw file to the row specifiers and the char positions.
To find a row in the v1.4 table from the annotations in v1.3, we use several fields as a specifier (not `ROW_ID`) to have few row candidates, and then choose the row that the misspelling is placed closest to the char position from v1.3 annotation.

In [3]:
# MIMIC-III v1.4 NOTEEVENTS table
mimic_csv_fpath = '../data/mimic3/NOTEEVENTS.csv'

# Pseudonymization
mimic_tools_dpath = 'mimic-tools/'
temp_root = '/tmp/cim'
pseudo_in_dpath = os.path.join(temp_root, 'temp')
pseudo_out_dpath = os.path.join(temp_root, 'temp_pseudonym')

# Output dir
output_dir = '../data/mimic_clinspell/'

# Label correction used to evaluate
correction_fpath = '../data/mimic_clinspell/clinspell_corrections.csv'

In [2]:
# The misspelling annotations converted to (HADM_ID, CHARTDATE, CHARTTIME, STORETIME, CATEGORY)
# The original annotations were in the line number of the raw file.
# Note that these annotations are still in the MIMIC-III v1.3, the version Clinspell used,

# (ROW_ID, HADM_ID, CHARTDATE, CHARTTIME, STORETIME, CATEGORY, char location, misspelling, correction)
clinspell_annotations = [(52849, 124522.0, '2142-03-24', float('nan'), float('nan'), 'Discharge summary', 23442, 'carediolgy', 'cardiology'),
                         (758716, 168886.0, '2130-07-26', '2130-07-26 13:37:00', float('nan'), 'Radiology', 1153, 'lugns', 'lungs'),
                         (1094190, 191645.0, '2117-09-10', '2117-09-10 17:00:00', float('nan'), 'Radiology', 783, 'lugns', 'lungs'),
                         (1330298, 132825.0, '2191-03-15', '2191-03-15 12:45:00', '2191-03-15 12:45:00', 'Nursing/other', 94, 'lugns', 'lungs'),
                         (1374973, 164191.0, '2159-12-19', '2159-12-19 11:45:00', '2159-12-19 11:45:00', 'Nursing/other', 82, 'lugns', 'lungs'),
                         (1733506, 173779.0, '2191-02-17', '2191-02-17 14:09:00', '2191-02-17 14:10:00', 'Nursing/other', 58, 'lugns', 'lungs'),
                         (14760, 115121.0, '2167-02-03', float('nan'), float('nan'), 'Discharge summary', 4743, 'ecchinocytes', 'echinocytes'),
                         (27431, 183816.0, '2174-06-07', float('nan'), float('nan'), 'Discharge summary', 10302, 'procuedure', 'procedure'),
                         (860639, float('nan'), '2142-04-19', '2142-04-19 07:18:00', float('nan'), 'Radiology', 2191, 'procuedure', 'procedure'),
                         (384642, 118625.0, '2158-03-01', '2158-03-01 16:07:00', '2158-03-01 16:07:08', 'Respiratory ', 990, 'avening', 'evening'),
                         (1273235, 103307.0, '2128-04-15', '2128-04-15 17:37:00', '2128-04-15 17:57:00', 'Nursing/other', 312, 'avening', 'evening'),
                         (2025486, 152298.0, '2136-11-17', '2136-11-17 05:36:00', '2136-11-17 05:49:00', 'Nursing/other', 261, 'avening', 'evening'),
                         (4904, 134410.0, '2119-10-03', float('nan'), float('nan'), 'Discharge summary', 10266, 'enteracept', 'etanercept'),
                         (320846, 134410.0, '2119-09-27', '2119-09-27 07:50:00', '2119-09-27 07:52:32', 'Physician ', 6119, 'enteracept', 'etanercept'),
                         (514487, 134410.0, '2119-09-26', '2119-09-26 19:34:00', '2119-09-26 19:35:50', 'Physician ', 1582, 'enteracept', 'etanercept'),
                         (459764, 134250.0, '2130-12-20', '2130-12-20 07:13:00', '2130-12-20 16:14:40', 'Physician ', 7440, 'hepaotology', 'hepatology'),
                         (354465, 174215.0, '2152-12-01', '2152-12-01 04:28:00', '2152-12-01 04:47:20', 'Physician ', 3538, 'precipirtouskly', 'precipitously'),
                         (46579, 152925.0, '2105-12-26', float('nan'), float('nan'), 'Discharge summary', 6089, 'trachiotomy', 'tracheotomy'),
                         (1466870, 195641.0, '2133-04-21', '2133-04-21 04:52:00', '2133-04-21 05:15:00', 'Nursing/other', 1320, 'trachiotomy', 'tracheotomy'),
                         (1338745, 125523.0, '2164-05-20', '2164-05-20 14:20:00', '2164-05-20 17:57:00', 'Nursing/other', 137, 'responsvie', 'responsive'),
                         (1530963, 152853.0, '2166-04-11', '2166-04-11 03:34:00', '2166-04-11 03:40:00', 'Nursing/other', 164, 'responsvie', 'responsive'),
                         (1655119, 158616.0, '2121-04-21', '2121-04-21 18:06:00', '2121-04-21 18:33:00', 'Nursing/other', 1132, 'responsvie', 'responsive'),
                         (1737121, 117042.0, '2119-07-16', '2119-07-16 12:05:00', '2119-07-16 12:24:00', 'Nursing/other', 269, 'responsvie', 'responsive'),
                         (1926758, 117058.0, '2103-12-16', '2103-12-16 10:19:00', '2103-12-16 10:24:00', 'Nursing/other', 264, 'responsvie', 'responsive'),
                         (1229318, 152718.0, '2126-05-08', '2126-05-08 19:50:00', float('nan'), 'Radiology', 285, 'intubwted', 'intubed'),
                         (707888, 119095.0, '2117-12-09', '2117-12-09 10:12:00', '2117-12-09 10:12:15', 'Physician ', 293, 'sunglottic', 'subglottic'),
                         (1198188, 106361.0, '2165-08-09', '2165-08-09 22:22:00', float('nan'), 'Radiology', 1593, 'laternatively', 'alternatively'),
                         (919309, float('nan'), '2123-07-31', '2123-07-31 15:14:00', float('nan'), 'Radiology', 389, 'pylenonephritis', 'pyelonephritis'),
                         (1053293, 106250.0, '2103-12-11', '2103-12-11 10:38:00', float('nan'), 'Radiology', 373, 'pylenonephritis', 'pyelonephritis'),
                         (1254100, float('nan'), '2203-12-10', '2203-12-10 10:52:00', float('nan'), 'Radiology', 143, 'pylenonephritis', 'pyelonephritis'),
                         (1924065, 101254.0, '2135-09-01', '2135-09-01 16:45:00', '2135-09-01 17:42:00', 'Nursing/other', 281, 'pylenonephritis', 'pyelonephritis'),
                         (1924065, 101254.0, '2135-09-01', '2135-09-01 16:45:00', '2135-09-01 17:42:00', 'Nursing/other', 1423, 'pylenonephritis', 'pyelonephritis'),
                         (1395, 182657.0, '2176-10-15', float('nan'), float('nan'), 'Discharge summary', 4499, 'increaseds', 'increases'),
                         (1499458, 182482.0, '2163-12-03', '2163-12-03 12:47:00', '2163-12-03 12:50:00', 'Nursing/other', 434, 'increaseds', 'increases'),
                         (814547, float('nan'), '2135-03-07', '2135-03-07 11:14:00', float('nan'), 'Radiology', 1392, 'qudrant', 'quadrant'),
                         (965759, float('nan'), '2126-07-17', '2126-07-17 23:11:00', float('nan'), 'Radiology', 1178, 'qudrant', 'quadrant'),
                         (1126330, 100492.0, '2193-03-15', '2193-03-15 19:23:00', float('nan'), 'Radiology', 721, 'qudrant', 'quadrant'),
                         (1276763, 139128.0, '2168-02-06', '2168-02-06 05:06:00', '2168-02-06 05:16:00', 'Nursing/other', 144, 'noteed', 'noted'),
                         (1266951, 109968.0, '2151-03-26', '2151-03-26 17:42:00', '2151-03-26 18:11:00', 'Nursing/other', 1042, 'noteed', 'noted'),
                         (1286515, 119984.0, '2118-09-22', '2118-09-22 05:16:00', '2118-09-22 05:38:00', 'Nursing/other', 206, 'noteed', 'noted'),
                         (1746698, 114624.0, '2185-02-09', '2185-02-09 14:32:00', '2185-02-09 14:48:00', 'Nursing/other', 166, 'noteed', 'noted'),
                         (2040862, 184161.0, '2110-09-06', '2110-09-06 18:22:00', '2110-09-06 18:27:00', 'Nursing/other', 399, 'noteed', 'noted'),
                         (1915683, 185982.0, '2166-12-21', '2166-12-21 06:05:00', '2166-12-21 06:17:00', 'Nursing/other', 430, 'specil', 'special'),
                         (1045934, 115553.0, '2112-11-30', '2112-11-30 16:59:00', float('nan'), 'Radiology', 1681, 'distince', 'distinct'),
                         (2016866, 146982.0, '2170-09-20', '2170-09-20 06:22:00', '2170-09-20 06:59:00', 'Nursing/other', 714, 'distince', 'distinct'),
                         (36168, 148092.0, '2189-11-08', float('nan'), float('nan'), 'Discharge summary', 7276, 'hypokiinesis', 'hypokinesis'),
                         (93423, float('nan'), '2114-05-29', float('nan'), float('nan'), 'Echo', 3682, 'hypokiinesis', 'hypokinesis'),
                         (26815, 198323.0, '2147-03-29', float('nan'), float('nan'), 'Discharge summary', 4647, 'hypercholesteromia', 'hypercholesteremia'),
                         (38436, 100349.0, '2114-07-11', float('nan'), float('nan'), 'Discharge summary', 392, 'hypercholesteromia', 'hypercholesteremia'),
                         (38436, 100349.0, '2114-07-11', float('nan'), float('nan'), 'Discharge summary', 1411, 'hypercholesteromia', 'hypercholesteremia'),
                         (38436, 100349.0, '2114-07-11', float('nan'), float('nan'), 'Discharge summary', 8828, 'hypercholesteromia', 'hypercholesteremia'),
                         (1531310, 159960.0, '2170-06-17', '2170-06-17 01:06:00', '2170-06-17 01:11:00', 'Nursing/other', 235, 'hypercholesteromia', 'hypercholesteremia'),
                         (1447519, 148209.0, '2181-02-14', '2181-02-14 18:37:00', '2181-02-14 18:53:00', 'Nursing/other', 1358, 'supportvie', 'supportive'),
                         (1098072, float('nan'), '2154-09-18', '2154-09-18 12:29:00', float('nan'), 'Radiology', 253, 'spenomegaly', 'splenomegaly'),
                         (1099312, 109134.0, '2115-09-18', '2115-09-18 19:47:00', float('nan'), 'Radiology', 422, 'spenomegaly', 'splenomegaly'),
                         (1196269, 192726.0, '2186-07-22', '2186-07-22 19:12:00', float('nan'), 'Radiology', 176, 'spenomegaly', 'splenomegaly'),
                         (1687689, 175063.0, '2188-06-11', '2188-06-11 16:30:00', '2188-06-11 17:06:00', 'Nursing/other', 1072, 'spenomegaly', 'splenomegaly'),
                         (1992331, 164869.0, '2105-06-09', '2105-06-09 03:39:00', '2105-06-09 04:08:00', 'Nursing/other', 322, 'spenomegaly', 'splenomegaly'),
                         (404147, 144591.0, '2111-07-04', '2111-07-04 15:13:00', '2111-07-04 18:03:53', 'Nursing', 2001, 'antiarrrhythmics', 'antiarrhythmics'),
                         (724301, 144591.0, '2111-07-04', '2111-07-04 15:13:00', '2111-07-04 15:13:05', 'Nursing', 2001, 'antiarrrhythmics', 'antiarrhythmics'),
                         (39232, 187585.0, '2201-01-14', float('nan'), float('nan'), 'Discharge summary', 2697, 'reporteldy', 'reportedly'),
                         (1369170, 122536.0, '2141-08-04', '2141-08-04 14:07:00', '2141-08-04 14:11:00', 'Nursing/other', 398, 'reporteldy', 'reportedly'),
                         (1747007, 114624.0, '2185-04-11', '2185-04-11 16:10:00', '2185-04-11 16:12:00', 'Nursing/other', 319, 'reporteldy', 'reportedly'),
                         (1851065, 176883.0, '2104-09-13', '2104-09-13 09:55:00', '2104-09-13 09:57:00', 'Nursing/other', 202, 'reporteldy', 'reportedly'),
                         (1935557, 190426.0, '2170-08-09', '2170-08-09 15:39:00', '2170-08-09 16:04:00', 'Nursing/other', 316, 'reporteldy', 'reportedly'),
                         (54826, 158983.0, '2135-06-07', float('nan'), float('nan'), 'Discharge summary', 739, 'myalgais', 'myalgias'),
                         (54199, 144913.0, '2198-01-02', float('nan'), float('nan'), 'Discharge summary', 12775, 'imapenem', 'imipenem'),
                         (1261417, 199994.0, '2188-07-08', '2188-07-08 14:42:00', '2188-07-08 15:05:00', 'Nursing/other', 1395, 'imapenem', 'imipenem'),
                         (1286948, 163196.0, '2110-08-25', '2110-08-25 05:17:00', '2110-08-25 06:01:00', 'Nursing/other', 1441, 'imapenem', 'imipenem'),
                         (1471411, 105525.0, '2105-05-20', '2105-05-20 05:51:00', '2105-05-20 06:15:00', 'Nursing/other', 1220, 'placemedt', 'placement'),
                         (614332, 103146.0, '2114-05-05', '2114-05-05 14:39:00', '2114-05-05 14:39:51', 'Physician ', 444, 'transducion', 'transduction'),
                         (1177643, 183945.0, '2139-03-30', '2139-03-30 17:01:00', float('nan'), 'Radiology', 653, 'supralilar', 'suprahilar'),
                         (25111, 168082.0, '2161-11-25', float('nan'), float('nan'), 'Discharge summary', 2170, 'neoptlams', 'neoplasm'),
                         (40389, 159650.0, '2109-11-19', float('nan'), float('nan'), 'Discharge summary', 2962, 'umcomplicated', 'uncomplicated'),
                         (1375643, 199272.0, '2110-01-14', '2110-01-14 15:28:00', '2110-01-14 15:37:00', 'Nursing/other', 333, 'umcomplicated', 'uncomplicated'),
                         (1366954, 134663.0, '2156-07-24', '2156-07-24 06:59:00', '2156-07-24 07:09:00', 'Nursing/other', 266, 'umcomplicated', 'uncomplicated'),
                         (1408417, 147926.0, '2157-06-16', '2157-06-16 03:38:00', '2157-06-16 04:01:00', 'Nursing/other', 238, 'umcomplicated', 'uncomplicated'),
                         (1777682, 126990.0, '2152-01-16', '2152-01-16 01:49:00', '2152-01-16 02:11:00', 'Nursing/other', 470, 'umcomplicated', 'uncomplicated'),
                         (27777, 157619.0, '2192-08-04', float('nan'), float('nan'), 'Discharge summary', 1629, 'arimdex', 'arimidex'),
                         (27778, 154742.0, '2192-09-07', float('nan'), float('nan'), 'Discharge summary', 2673, 'arimdex', 'arimidex'),
                         (50437, 122231.0, '2134-12-11', float('nan'), float('nan'), 'Discharge summary', 8906, 'arimdex', 'arimidex'),
                         (540432, 122231.0, '2134-12-08', '2134-12-08 06:57:00', '2134-12-08 12:44:28', 'Physician ', 6731, 'arimdex', 'arimidex'),
                         (653044, 157619.0, '2192-08-02', '2192-08-02 01:31:00', '2192-08-02 01:31:10', 'Physician ', 2285, 'arimdex', 'arimidex'),
                         (735209, 155415.0, '2163-07-26', '2163-07-26 10:37:00', float('nan'), 'Radiology', 2293, 'acrss', 'across'),
                         (1768442, 153413.0, '2192-05-05', '2192-05-05 05:20:00', '2192-05-05 05:44:00', 'Nursing/other', 952, 'acetazolamind', 'acetazolamide'),
                         (237, 155131.0, '2131-12-29', float('nan'), float('nan'), 'Discharge summary', 1954, 'roccuronium', 'rocuronium'),
                         (331764, 124439.0, '2136-11-02', '2136-11-01 06:03:00', '2136-11-02 17:14:45', 'Physician ', 6310, 'sless', 'less'),
                         (469781, 170312.0, '2111-03-28', '2111-03-27 21:33:00', '2111-03-28 04:06:03', 'Nursing', 650, 'aroound', 'around'),
                         (715984, 137825.0, '2121-04-15', '2121-04-15 09:55:00', '2121-04-15 09:54:52', 'Physician ', 473, 'aroound', 'around'),
                         (1303962, 185228.0, '2113-05-25', '2113-05-25 19:12:00', '2113-05-25 19:20:00', 'Nursing/other', 34, 'aroound', 'around'),
                         (1536312, 155067.0, '2170-07-07', '2170-07-07 19:30:00', '2170-07-07 19:41:00', 'Nursing/other', 562, 'aroound', 'around'),
                         (1592939, 180621.0, '2191-03-26', '2191-03-26 05:50:00', '2191-03-26 06:05:00', 'Nursing/other', 963, 'aroound', 'around'),
                         (422569, 164088.0, '2160-07-16', '2160-07-16 07:19:00', '2160-07-16 10:47:15', 'Physician ', 6930, 'concerntrate', 'concentrate'),
                         (332210, 119575.0, '2131-10-03', '2131-10-03 06:19:00', '2131-10-03 06:19:47', 'Physician ', 5219, 'demarginization', 'demargination'),
                         (529031, 119575.0, '2131-10-02', '2131-10-02 06:00:00', '2131-10-02 19:07:17', 'Physician ', 6838, 'demarginization', 'demargination'),
                         (767994, 123813.0, '2196-12-02', '2196-12-02 21:11:00', float('nan'), 'Radiology', 1136, 'hpatic', 'hepatic'),
                         (808810, float('nan'), '2110-01-28', '2110-01-28 22:13:00', float('nan'), 'Radiology', 185, 'hpatic', 'hepatic'),
                         (1011840, 139995.0, '2164-05-24', '2164-05-24 09:57:00', float('nan'), 'Radiology', 1019, 'hpatic', 'hepatic'),
                         (756777, 121982.0, '2136-05-06', '2136-05-06 21:06:00', float('nan'), 'Radiology', 1480, 'retroobital', 'retroorbital'),
                         (1413374, 119194.0, '2158-08-31', '2158-08-31 05:04:00', '2158-08-31 05:07:00', 'Nursing/other', 384, 'ferinosl', 'fer-in-sol'),
                         (42460, 175506.0, '2146-01-27', float('nan'), float('nan'), 'Discharge summary', 3784, 'systoilic', 'systolic'),
                         (1476817, 115846.0, '2128-06-15', '2128-06-15 05:19:00', '2128-06-15 05:35:00', 'Nursing/other', 656, 'systoilic', 'systolic'),
                         (1606221, 133823.0, '2144-06-23', '2144-06-23 16:11:00', '2144-06-23 16:14:00', 'Nursing/other', 292, 'systoilic', 'systolic'),
                         (1842050, 137736.0, '2124-07-05', '2124-07-05 18:35:00', '2124-07-05 18:37:00', 'Nursing/other', 229, 'systoilic', 'systolic'),
                         (1126538, float('nan'), '2145-03-07', '2145-03-07 21:36:00', float('nan'), 'Radiology', 1490, 'hyyaline', 'hyaline'),
                         (32490, 108765.0, '2140-12-23', float('nan'), float('nan'), 'Discharge summary', 18601, 'ferriecit', 'ferrlecit'),
                         (1270576, 111487.0, '2131-10-28', '2131-10-28 17:24:00', '2131-10-28 17:58:00', 'Nursing/other', 1946, 'claoric', 'caloric'),
                         (1271769, 108981.0, '2191-01-18', '2191-01-18 18:17:00', '2191-01-18 18:38:00', 'Nursing/other', 895, 'claoric', 'caloric'),
                         (1474624, float('nan'), '2167-07-15', '2167-07-15 18:50:00', '2167-07-15 19:31:00', 'Nursing/other', 1332, 'claoric', 'caloric'),
                         (1775515, 190749.0, '2155-07-01', '2155-07-01 19:11:00', '2155-07-01 19:33:00', 'Nursing/other', 1440, 'claoric', 'caloric'),
                         (2057418, 134955.0, '2136-04-26', '2136-04-26 11:21:00', '2136-04-26 11:25:00', 'Nursing/other', 713, 'claoric', 'caloric'),
                         (591111, 140622.0, '2105-05-12', '2105-05-12 10:41:00', '2105-05-12 10:41:54', 'Nursing', 1488, 'ankld', 'ankle'),
                         (335603, 122926.0, '2135-09-10', '2135-09-10 06:18:00', '2135-09-10 06:19:07', 'Respiratory ', 1483, 'desature', 'desaturate'),
                         (1529335, 100797.0, '2116-06-19', '2116-06-19 17:25:00', '2116-06-19 17:49:00', 'Nursing/other', 1436, 'cyclorsporin', 'cyclosporin'),
                         (1621772, 180543.0, '2147-08-01', '2147-08-01 18:02:00', '2147-08-01 18:06:00', 'Nursing/other', 16, 'cyclorsporin', 'cyclosporin'),
                         (2017696, 149896.0, '2114-09-29', '2114-09-29 05:32:00', '2114-09-29 05:47:00', 'Nursing/other', 743, 'cyclorsporin', 'cyclosporin'),
                         (1031736, 175595.0, '2166-09-25', '2166-09-25 17:03:00', float('nan'), 'Radiology', 2375, 'rgoins', 'groins'),
                         (916746, float('nan'), '2188-07-28', '2188-07-28 07:53:00', float('nan'), 'Radiology', 818, 'bacteremis', 'bacteremia'),
                         (1037837, 199628.0, '2156-12-06', '2156-12-06 12:23:00', float('nan'), 'Radiology', 389, 'bacteremis', 'bacteremia'),
                         (535895, 164510.0, '2123-09-29', '2123-09-29 02:32:00', '2123-09-29 05:06:51', 'Nursing', 1268, 'sympots', 'symptoms'),
                         (772737, 181285.0, '2201-01-30', '2201-01-30 15:40:00', float('nan'), 'Radiology', 680, 'comminted', 'comminuted'),
                         (872013, float('nan'), '2132-08-10', '2132-08-10 21:17:00', float('nan'), 'Radiology', 777, 'comminted', 'comminuted'),
                         (9296, 106883.0, '2199-01-24', float('nan'), float('nan'), 'Discharge summary', 7966, 'signl', 'signal'),
                         (929998, float('nan'), '2140-10-15', '2140-10-15 12:36:00', float('nan'), 'Radiology', 3796, 'signl', 'signal'),
                         (1230851, float('nan'), '2121-03-31', '2121-03-31 23:58:00', float('nan'), 'Radiology', 2482, 'signl', 'signal'),
                         (1230851, float('nan'), '2121-03-31', '2121-03-31 23:58:00', float('nan'), 'Radiology', 4888, 'signl', 'signal'),
                         (1145972, 158178.0, '2157-08-31', '2157-08-31 11:45:00', float('nan'), 'Radiology', 5037, 'extcretion', 'excretion'),
                         (1251183, float('nan'), '2126-10-25', '2126-10-25 18:21:00', float('nan'), 'Radiology', 623, 'ssigns', 'signs'),
                         (3384, 160775.0, '2101-08-05', float('nan'), float('nan'), 'Discharge summary', 8626, 'incontinenece', 'incontinence'),
                         (6952, 128182.0, '2106-03-27', float('nan'), float('nan'), 'Discharge summary', 25145, 'incontinenece', 'incontinence'),
                         (48735, 165050.0, '2154-01-28', float('nan'), float('nan'), 'Discharge summary', 2820, 'incontinenece', 'incontinence'),
                         (428390, 173017.0, '2162-08-04', '2162-08-04 04:47:00', '2162-08-04 10:49:40', 'Physician ', 8068, 'incontinenece', 'incontinence'),
                         (1888906, 108222.0, '2176-11-19', '2176-11-19 06:02:00', '2176-11-19 06:27:00', 'Nursing/other', 1279, 'incontinenece', 'incontinence'),
                         (445638, 174373.0, '2181-10-21', '2181-10-21 05:08:00', '2181-10-21 05:08:19', 'Nursing', 521, 'vaspopressin', 'vasopressin'),
                         (504704, 146736.0, '2132-11-01', '2132-11-01 07:17:00', '2132-11-01 07:17:08', 'Physician ', 7976, 'vaspopressin', 'vasopressin'),
                         (1872651, 179315.0, '2122-02-16', '2122-02-16 05:45:00', '2122-02-16 06:05:00', 'Nursing/other', 1092, 'vaspopressin', 'vasopressin'),
                         (19368, 191546.0, '2122-05-12', float('nan'), float('nan'), 'Discharge summary', 5425, 'regurtitation', 'regurgitation'),
                         (35898, 166294.0, '2108-02-13', float('nan'), float('nan'), 'Discharge summary', 3026, 'regurtitation', 'regurgitation'),
                         (52341, 197428.0, '2109-06-14', float('nan'), float('nan'), 'Discharge summary', 745, 'regurtitation', 'regurgitation'),
                         (368, 111651.0, '2167-11-09', float('nan'), float('nan'), 'Discharge summary', 3194, 'thalasemmia', 'thalassemia'),
                         (891, 128823.0, '2179-08-17', float('nan'), float('nan'), 'Discharge summary', 4385, 'thalasemmia', 'thalassemia'),
                         (46023, 140818.0, '2123-06-28', float('nan'), float('nan'), 'Discharge summary', 9094, 'thalasemmia', 'thalassemia'),
                         (53479, 173425.0, '2192-07-23', float('nan'), float('nan'), 'Discharge summary', 1098, 'thalasemmia', 'thalassemia'),
                         (1296023, 189601.0, '2189-11-12', '2189-11-12 06:25:00', '2189-11-12 06:50:00', 'Nursing/other', 581, 'incrrease', 'increase'),
                         (1373439, 176404.0, '2137-12-18', '2137-12-18 15:33:00', '2137-12-18 15:46:00', 'Nursing/other', 518, 'incrrease', 'increase'),
                         (1480796, 166297.0, '2138-07-17', '2138-07-17 17:05:00', '2138-07-17 17:09:00', 'Nursing/other', 197, 'incrrease', 'increase'),
                         (1850836, 164974.0, '2122-01-29', '2122-01-29 13:39:00', '2122-01-29 13:52:00', 'Nursing/other', 849, 'incrrease', 'increase'),
                         (2022778, 187314.0, '2138-04-25', '2138-04-25 17:31:00', '2138-04-25 17:49:00', 'Nursing/other', 2018, 'incrrease', 'increase'),
                         (314023, 113412.0, '2181-06-21', '2181-06-21 09:05:00', '2181-06-21 09:05:46', 'Physician ', 4507, 'rtotated', 'rotated'),
                         (45222, 175452.0, '2198-12-26', float('nan'), float('nan'), 'Discharge summary', 15276, 'accordnace', 'accordance'),
                         (32381, 161527.0, '2113-03-04', float('nan'), float('nan'), 'Discharge summary', 4990, 'backgroun', 'background'),
                         (313183, 173513.0, '2167-02-14', '2167-02-14 03:26:00', '2167-02-14 07:26:54', 'Physician ', 8758, 'backgroun', 'background'),
                         (515045, 113437.0, '2119-07-05', '2119-07-05 06:24:00', '2119-07-05 11:55:02', 'Physician ', 4321, 'backgroun', 'background'),
                         (533839, 136560.0, '2184-09-25', '2184-09-25 00:32:00', '2184-09-25 03:27:58', 'Physician ', 6036, 'constusions', 'contusions'),
                         (1033230, 136560.0, '2184-09-24', '2184-09-24 18:01:00', float('nan'), 'Radiology', 618, 'constusions', 'contusions'),
                         (1249780, 145825.0, '2185-09-16', '2185-09-16 04:41:00', float('nan'), 'Radiology', 414, 'constusions', 'contusions'),
                         (23502, 118312.0, '2109-02-07', float('nan'), float('nan'), 'Discharge summary', 10303, 'iatrogentic', 'iatrogenic'),
                         (409075, 134801.0, '2151-07-07', '2151-07-07 06:52:00', '2151-07-07 07:15:37', 'Nursing', 469, 'iatrogentic', 'iatrogenic'),
                         (771334, 112421.0, '2131-01-12', '2131-01-12 13:11:00', float('nan'), 'Radiology', 368, 'iatrogentic', 'iatrogenic'),
                         (771519, 112421.0, '2131-01-15', '2131-01-15 10:26:00', float('nan'), 'Radiology', 397, 'iatrogentic', 'iatrogenic'),
                         (771385, 112421.0, '2131-01-13', '2131-01-13 07:44:00', float('nan'), 'Radiology', 349, 'iatrogentic', 'iatrogenic'),
                         (981143, 180595.0, '2128-11-02', '2128-11-02 17:22:00', float('nan'), 'Radiology', 639, 'hmeithorax', 'hemithorax'),
                         (557863, 153325.0, '2124-01-02', '2124-01-02 17:42:00', '2124-01-02 17:42:41', 'Respiratory ', 1683, 'requirs', 'requires'),
                         (1308636, 140347.0, '2187-11-23', '2187-11-23 06:33:00', '2187-11-23 06:56:00', 'Nursing/other', 1031, 'requirs', 'requires'),
                         (1314727, 125203.0, '2186-06-04', '2186-06-04 07:35:00', '2186-06-04 07:38:00', 'Nursing/other', 379, 'requirs', 'requires'),
                         (1476762, 175397.0, '2199-07-16', '2199-07-16 04:08:00', '2199-07-16 04:29:00', 'Nursing/other', 698, 'requirs', 'requires'),
                         (1780521, 165091.0, '2181-10-08', '2181-10-08 05:09:00', '2181-10-08 05:22:00', 'Nursing/other', 148, 'requirs', 'requires'),
                         (424784, 155246.0, '2176-07-21', '2176-07-21 17:57:00', '2176-07-21 17:57:58', 'Nursing', 967, 'chheks', 'cheeks'),
                         (1876347, 129945.0, '2168-07-06', '2168-07-06 17:31:00', '2168-07-06 17:57:00', 'Nursing/other', 1369, 'chheks', 'cheeks'),
                         (36126, 131921.0, '2113-12-22', float('nan'), float('nan'), 'Discharge summary', 18619, 'confuson', 'confusion'),
                         (1908454, 158950.0, '2127-02-08', '2127-02-08 05:07:00', '2127-02-08 05:20:00', 'Nursing/other', 194, 'confuson', 'confusion'),
                         (2016297, 109484.0, '2141-06-20', '2141-06-20 05:25:00', '2141-06-20 05:39:00', 'Nursing/other', 688, 'confuson', 'confusion'),
                         (757039, 103388.0, '2110-06-04', '2110-06-04 14:44:00', float('nan'), 'Radiology', 622, 'peunothorax', 'pneumothorax'),
                         (8613, 102717.0, '2167-04-27', float('nan'), float('nan'), 'Discharge summary', 6613, 'fibros', 'fibrosis'),
                         (956111, 102717.0, '2167-04-23', '2167-04-23 19:46:00', float('nan'), 'Radiology', 194, 'fibros', 'fibrosis'),
                         (1102499, float('nan'), '2161-12-08', '2161-12-08 07:47:00', float('nan'), 'Radiology', 245, 'fibros', 'fibrosis'),
                         (1303137, 102513.0, '2157-05-04', '2157-05-04 06:50:00', '2157-05-04 06:59:00', 'Nursing/other', 216, 'mininutes', 'minutes'),
                         (1681706, 156906.0, '2138-02-18', '2138-02-18 20:06:00', '2138-02-18 20:40:00', 'Nursing/other', 1165, 'mininutes', 'minutes'),
                         (1802224, 115315.0, '2184-10-14', '2184-10-14 13:55:00', '2184-10-14 14:07:00', 'Nursing/other', 277, 'mininutes', 'minutes'),
                         (2018730, 128774.0, '2165-09-07', '2165-09-07 03:23:00', '2165-09-07 04:02:00', 'Nursing/other', 2785, 'mininutes', 'minutes'),
                         (9011, 162557.0, '2110-05-02', float('nan'), float('nan'), 'Discharge summary', 8659, 'dehiscnece', 'dehiscence'),
                         (19527, 107211.0, '2144-03-29', float('nan'), float('nan'), 'Discharge summary', 6792, 'hydorcele', 'hydrocele'),
                         (1949499, 175162.0, '2138-10-31', '2138-10-31 15:59:00', '2138-10-31 16:52:00', 'Nursing/other', 965, 'hydorcele', 'hydrocele'),
                         (619723, 118470.0, '2117-07-15', '2117-07-15 06:57:00', '2117-07-15 06:57:16', 'Physician ', 374, 'aprropriately', 'appropriately'),
                         (1292711, 143908.0, '2191-06-30', '2191-06-30 14:43:00', '2191-06-30 15:04:00', 'Nursing/other', 761, 'aprropriately', 'appropriately'),
                         (1334461, 196821.0, '2170-02-19', '2170-02-19 13:49:00', '2170-02-19 13:53:00', 'Nursing/other', 44, 'aprropriately', 'appropriately'),
                         (1416538, 189304.0, '2193-09-18', '2193-09-18 18:06:00', '2193-09-18 18:23:00', 'Nursing/other', 547, 'aprropriately', 'appropriately'),
                         (1617032, 129840.0, '2157-07-28', '2157-07-28 15:20:00', '2157-07-28 15:32:00', 'Nursing/other', 685, 'aprropriately', 'appropriately'),
                         (4677, 198167.0, '2116-01-18', float('nan'), float('nan'), 'Discharge summary', 2498, 'psteroids', 'steroids'),
                         (472297, 145522.0, '2135-01-21', '2135-01-21 14:31:00', '2135-01-21 18:37:35', 'Nursing', 3244, 'prosatectomy', 'prostatectomy'),
                         (528215, 131907.0, '2130-09-27', '2130-09-27 06:08:00', '2130-09-27 11:15:08', 'Physician ', 3138, 'prosatectomy', 'prostatectomy'),
                         (54137, 167516.0, '2145-09-26', float('nan'), float('nan'), 'Discharge summary', 838, 'popletial', 'popliteal'),
                         (392119, 195111.0, '2154-04-23', '2154-04-23 11:32:00', '2154-04-23 11:32:59', 'Physician ', 6146, 'popletial', 'popliteal'),
                         (562611, 170681.0, '2105-01-16', '2105-01-16 04:48:00', '2105-01-16 06:08:01', 'Nursing', 193, 'popletial', 'popliteal'),
                         (1083823, float('nan'), '2116-06-22', '2116-06-22 16:25:00', float('nan'), 'Radiology', 1315, 'popletial', 'popliteal'),
                         (1220610, 144844.0, '2109-01-01', '2109-01-01 09:51:00', float('nan'), 'Radiology', 4686, 'popletial', 'popliteal'),
                         (1567993, 135786.0, '2136-12-17', '2136-12-17 17:52:00', '2136-12-17 18:59:00', 'Nursing/other', 45, 'intuabte', 'intubate'),
                         (1815141, 190127.0, '2108-06-10', '2108-06-10 16:43:00', '2108-06-10 16:54:00', 'Nursing/other', 442, 'intuabte', 'intubate'),
                         (1415025, 149510.0, '2129-07-13', '2129-07-13 01:48:00', '2129-07-13 02:21:00', 'Nursing/other', 1384, 'femerals', 'femoralis'),
                         (1698296, 170598.0, '2162-07-23', '2162-07-23 19:19:00', '2162-07-23 19:33:00', 'Nursing/other', 411, 'femerals', 'femoralis'),
                         (35947, 181768.0, '2148-12-13', float('nan'), float('nan'), 'Discharge summary', 8339, 'popletieal', 'popliteal'),
                         (1696469, 108097.0, '2200-05-28', '2200-05-28 18:02:00', '2200-05-28 18:28:00', 'Nursing/other', 767, 'sensarion', 'sensation'),
                         (437533, 153933.0, '2153-11-17', '2153-11-17 02:35:00', '2153-11-17 06:57:51', 'Nursing', 427, 'thryohyoid', 'thyrohyoid'),
                         (438887, 153933.0, '2153-11-15', '2153-11-15 21:40:00', '2153-11-15 21:40:19', 'Physician ', 948, 'thryohyoid', 'thyrohyoid'),
                         (439624, 153933.0, '2153-11-17', '2153-11-17 16:40:00', '2153-11-17 17:59:54', 'Nursing', 427, 'thryohyoid', 'thyrohyoid'),
                         (1415607, 140318.0, '2158-10-05', '2158-10-05 03:21:00', '2158-10-05 03:37:00', 'Nursing/other', 614, 'dicarded', 'discarded'),
                         (1457608, 163355.0, '2191-03-29', '2191-03-29 14:07:00', '2191-03-29 19:00:00', 'Nursing/other', 69, 'dicarded', 'discarded'),
                         (1601664, 121680.0, '2143-05-09', '2143-05-09 03:03:00', '2143-05-09 03:16:00', 'Nursing/other', 644, 'dicarded', 'discarded'),
                         (1636295, 185655.0, '2188-12-16', '2188-12-16 04:43:00', '2188-12-16 05:09:00', 'Nursing/other', 1142, 'dicarded', 'discarded'),
                         (1749740, 148318.0, '2117-07-21', '2117-07-21 02:30:00', '2117-07-21 02:40:00', 'Nursing/other', 568, 'dicarded', 'discarded'),
                         (367670, 105454.0, '2198-12-27', '2198-12-27 17:39:00', '2198-12-27 17:39:44', 'Nursing', 3474, 'palitave', 'palliative'),
                         (441513, 105911.0, '2128-11-04', '2128-11-04 00:09:00', '2128-11-04 00:09:55', 'Nursing', 287, 'mucoiud', 'mucoid'),
                         (1921938, 106386.0, '2133-06-19', '2133-06-19 05:50:00', '2133-06-19 06:20:00', 'Nursing/other', 785, 'drianable', 'drainable'),
                         (564096, 123444.0, '2137-12-10', '2137-12-10 06:37:00', '2137-12-10 12:19:52', 'Physician ', 72, 'respoding', 'responding'),
                         (1886268, 116217.0, '2191-07-11', '2191-07-11 21:09:00', '2191-07-11 21:34:00', 'Nursing/other', 1257, 'respoding', 'responding'),
                         (1897666, 113968.0, '2163-05-27', '2163-05-27 02:41:00', '2163-05-27 03:09:00', 'Nursing/other', 762, 'respoding', 'responding'),
                         (1953821, 123105.0, '2151-01-31', '2151-01-31 09:55:00', '2151-01-31 10:00:00', 'Nursing/other', 291, 'respoding', 'responding'),
                         (2003890, 119156.0, '2191-11-10', '2191-11-10 19:46:00', '2191-11-10 20:28:00', 'Nursing/other', 188, 'respoding', 'responding'),
                         (1009634, 156619.0, '2139-04-14', '2139-04-14 09:25:00', float('nan'), 'Radiology', 2276, 'invadses', 'invades'),
                         (669423, 194803.0, '2107-01-07', '2107-01-07 03:11:00', '2107-01-07 08:18:55', 'Physician ', 4483, 'resutlts', 'results'),
                         (314039, 185673.0, '2167-05-29', '2167-05-29 08:53:00', '2167-05-29 08:53:34', 'Physician ', 150, 'exctration', 'extraction'),
                         (17403, 155341.0, '2196-12-13', float('nan'), float('nan'), 'Discharge summary', 2418, 'agglutin', 'agglutinin'),
                         (17403, 155341.0, '2196-12-13', float('nan'), float('nan'), 'Discharge summary', 5498, 'agglutin', 'agglutinin'),
                         (1699420, 191228.0, '2186-06-22', '2186-06-22 15:08:00', '2186-06-22 15:27:00', 'Nursing/other', 2298, 'agglutin', 'agglutinin'),
                         (1712818, 126739.0, '2179-06-17', '2179-06-17 16:16:00', '2179-06-17 16:44:00', 'Nursing/other', 1269, 'vasoleine', 'vaseline'),
                         (36158, 191315.0, '2189-01-06', float('nan'), float('nan'), 'Discharge summary', 6479, 'eosoniphils', 'eosinophils'),
                         (324343, 115381.0, '2109-08-12', '2109-08-12 01:16:00', '2109-08-12 04:48:02', 'Nursing', 2403, 'eosoniphils', 'eosinophils'),
                         (1878689, 107531.0, '2197-08-20', '2197-08-20 18:20:00', '2197-08-20 18:39:00', 'Nursing/other', 1120, 'eosoniphils', 'eosinophils'),
                         (1437, 198342.0, '2171-11-06', float('nan'), float('nan'), 'Discharge summary', 6768, 'hopitilization', 'hospitalization'),
                         (1317550, 175661.0, '2120-09-19', '2120-09-19 05:11:00', '2120-09-19 05:33:00', 'Nursing/other', 813, 'nygstamus', 'nystagmus'),
                         (40767, 101872.0, '2164-06-22', float('nan'), float('nan'), 'Discharge summary', 11989, 'diurises', 'diuresis'),
                         (361520, 170526.0, '2115-02-15', '2115-02-15 18:52:00', '2115-02-15 18:52:49', 'Nursing', 274, 'diurises', 'diuresis'),
                         (536754, 133120.0, '2201-09-24', '2201-09-24 20:56:00', '2201-09-24 20:56:45', 'Nursing', 960, 'diurises', 'diuresis'),
                         (657692, 162017.0, '2199-10-06', '2199-10-06 04:56:00', '2199-10-06 04:56:16', 'Nursing', 653, 'diurises', 'diuresis'),
                         (329899, 149115.0, '2120-09-28', '2120-09-28 01:53:00', '2120-09-28 02:31:02', 'Nursing', 152, 'nutritio', 'nutrition'),
                         (1478358, 108050.0, '2128-08-28', '2128-08-28 19:40:00', '2128-08-28 19:45:00', 'Nursing/other', 147, 'nutritio', 'nutrition'),
                         (15617, 112075.0, '2105-08-20', float('nan'), float('nan'), 'Discharge summary', 8942, 'pevlic', 'pelvic'),
                         (42479, 126156.0, '2197-02-13', float('nan'), float('nan'), 'Discharge summary', 9563, 'pevlic', 'pelvic'),
                         (1039112, float('nan'), '2114-10-07', '2114-10-07 17:07:00', float('nan'), 'Radiology', 425, 'pevlic', 'pelvic'),
                         (1175000, 139246.0, '2196-03-16', '2196-03-16 22:04:00', float('nan'), 'Radiology', 912, 'pevlic', 'pelvic'),
                         (1229136, 176736.0, '2187-03-20', '2187-03-20 10:52:00', float('nan'), 'Radiology', 323, 'pevlic', 'pelvic'),
                         (37332, 108873.0, '2187-12-13', float('nan'), float('nan'), 'Discharge summary', 8277, 'syndomre', 'syndrome'),
                         (324716, 163180.0, '2165-10-05', '2165-10-05 08:18:00', '2165-10-05 08:18:47', 'Physician ', 7743, 'syndomre', 'syndrome'),
                         (510383, 148470.0, '2194-04-13', '2194-04-13 17:42:00', '2194-04-13 23:15:40', 'Physician ', 152, 'syndomre', 'syndrome'),
                         (515531, 165323.0, '2128-07-16', '2128-07-16 08:39:00', '2128-07-16 08:39:43', 'Physician ', 1524, 'syndomre', 'syndrome'),
                         (732252, 163180.0, '2165-10-06', '2165-10-06 06:57:00', '2165-10-06 06:58:29', 'Physician ', 8057, 'syndomre', 'syndrome'),
                         (1057983, 153143.0, '2148-01-04', '2148-01-04 16:32:00', float('nan'), 'Radiology', 313, 'tomorrowpt', 'tomorrow'),
                         (454016, 186618.0, '2138-11-23', '2138-11-23 12:20:00', '2138-11-23 12:20:32', 'Physician ', 5911, 'respiraoty', 'respiratory'),
                         (600821, 109844.0, '2194-02-27', '2194-02-27 16:30:00', '2194-02-27 16:30:09', 'Nutrition', 418, 'respiraoty', 'respiratory'),
                         (1544016, 149063.0, '2200-08-31', '2200-08-31 16:22:00', '2200-08-31 16:23:00', 'Nursing/other', 63, 'respiraoty', 'respiratory'),
                         (1865425, 135854.0, '2154-03-25', '2154-03-25 08:48:00', '2154-03-25 08:53:00', 'Nursing/other', 454, 'respiraoty', 'respiratory'),
                         (1920512, 193809.0, '2181-08-20', '2181-08-20 17:12:00', '2181-08-20 17:37:00', 'Nursing/other', 806, 'respiraoty', 'respiratory'),
                         (1413207, 108778.0, '2154-09-05', '2154-09-05 03:47:00', '2154-09-05 03:57:00', 'Nursing/other', 328, 'pupilarry', 'pupilary'),
                         (1443696, 175529.0, '2192-02-05', '2192-02-05 17:27:00', '2192-02-05 18:15:00', 'Nursing/other', 433, 'pupilarry', 'pupilary'),
                         (31540, 170100.0, '2174-09-01', float('nan'), float('nan'), 'Discharge summary', 2257, 'desceased', 'deceased'),
                         (40006, 143555.0, '2114-12-12', float('nan'), float('nan'), 'Discharge summary', 3253, 'desceased', 'deceased'),
                         (41502, 126602.0, '2196-04-27', float('nan'), float('nan'), 'Discharge summary', 2446, 'desceased', 'deceased'),
                         (1300467, 162212.0, '2128-04-22', '2128-04-22 06:38:00', '2128-04-22 07:17:00', 'Nursing/other', 493, 'rflex', 'reflex'),
                         (1607446, 188216.0, '2129-06-10', '2129-06-10 18:36:00', '2129-06-10 18:44:00', 'Nursing/other', 122, 'rflex', 'reflex'),
                         (4563, 170926.0, '2159-10-03', float('nan'), float('nan'), 'Discharge summary', 10476, 'miacalicin', 'miacalcic'),
                         (573035, 108863.0, '2178-12-30', '2178-12-30 15:23:00', '2178-12-30 15:23:54', 'Rehab Services', 799, 'sliglhtly', 'slightly'),
                         (1047150, 187216.0, '2141-11-14', '2141-11-14 19:36:00', float('nan'), 'Radiology', 687, 'sliglhtly', 'slightly'),
                         (331880, 176512.0, '2192-10-04', '2192-10-04 08:35:00', '2192-10-04 08:35:22', 'Physician ', 6834, 'fionger', 'finger'),
                         (10978, 131723.0, '2161-02-06', float('nan'), float('nan'), 'Discharge summary', 6861, 'prblm', 'problem'),
                         (40541, 152960.0, '2102-08-25', float('nan'), float('nan'), 'Discharge summary', 9901, 'prblm', 'problem'),
                         (39246, 170820.0, '2188-04-30', float('nan'), float('nan'), 'Discharge summary', 11033, 'prblm', 'problem'),
                         (43815, 135049.0, '2130-02-18', float('nan'), float('nan'), 'Discharge summary', 8943, 'glarigine', 'glargine'),
                         (1280751, 183092.0, '2125-12-16', '2125-12-16 13:20:00', '2125-12-16 13:35:00', 'Nursing/other', 756, 'glarigine', 'glargine'),
                         (813792, float('nan'), '2173-04-05', '2173-04-05 01:08:00', float('nan'), 'Radiology', 947, 'strcture', 'stricture'),
                         (41475, 128256.0, '2122-03-22', float('nan'), float('nan'), 'Discharge summary', 10610, 'atazanzvir', 'atazanavir'),
                         (434637, 132026.0, '2124-08-27', '2124-08-27 07:00:00', '2124-08-27 14:45:54', 'Physician ', 6285, 'readins', 'reading'),
                         (515560, 193172.0, '2167-09-23', '2167-09-23 15:08:00', '2167-09-23 17:37:30', 'Nursing', 2163, 'readins', 'reading'),
                         (324431, 113344.0, '2138-09-30', '2138-09-30 07:07:00', '2138-09-30 09:55:01', 'Physician ', 73, 'ureteteral', 'ureteral'),
                         (30586, 118085.0, '2117-07-12', float('nan'), float('nan'), 'Discharge summary', 10627, 'pleurodeses', 'pleurodesis'),
                         (51973, 167243.0, '2177-08-22', float('nan'), float('nan'), 'Discharge summary', 2916, 'pleurodeses', 'pleurodesis'),
                         (393558, 173748.0, '2130-05-29', '2130-05-29 07:37:00', '2130-05-29 07:41:40', 'Physician ', 5708, 'pleurodeses', 'pleurodesis'),
                         (394100, 173748.0, '2130-05-28', '2130-05-28 01:35:00', '2130-05-28 01:35:55', 'Physician ', 14754, 'pleurodeses', 'pleurodesis'),
                         (841671, 198453.0, '2116-12-01', '2116-12-01 09:30:00', float('nan'), 'Radiology', 365, 'pleurodeses', 'pleurodesis'),
                         (49764, 132792.0, '2118-10-12', float('nan'), float('nan'), 'Discharge summary', 966, 'hematc', 'hematic'),
                         (926898, 164929.0, '2117-11-05', '2117-11-05 02:11:00', float('nan'), 'Radiology', 797, 'adenapathy', 'adenopathy'),
                         (746909, 143261.0, '2173-02-08', '2173-02-08 12:58:00', float('nan'), 'Radiology', 375, 'drainags', 'drainages'),
                         (746522, 143261.0, '2173-02-03', '2173-02-03 10:35:00', float('nan'), 'Radiology', 396, 'drainags', 'drainages'),
                         (749053, 140215.0, '2173-03-09', '2173-03-09 17:49:00', float('nan'), 'Radiology', 448, 'drainags', 'drainages'),
                         (1780185, 163221.0, '2169-07-03', '2169-07-03 16:45:00', '2169-07-03 17:22:00', 'Nursing/other', 1784, 'drainags', 'drainages'),
                         (356533, 191071.0, '2133-12-18', '2133-12-18 09:50:00', '2133-12-18 09:50:55', 'Rehab Services', 810, 'partcipate', 'participated'),
                         (43208, 170733.0, '2197-09-05', float('nan'), float('nan'), 'Discharge summary', 6717, 'cortonary', 'coronary'),
                         (60377, float('nan'), '2177-01-29', float('nan'), float('nan'), 'Echo', 143, 'cortonary', 'coronary'),
                         (423048, 170733.0, '2197-08-29', '2197-08-29 06:47:00', '2197-08-29 07:26:18', 'Physician ', 3992, 'cortonary', 'coronary'),
                         (343518, 112602.0, '2161-10-14', '2161-10-14 06:23:00', '2161-10-14 10:38:09', 'Physician ', 5748, 'cerruloplasm', 'ceruloplasmin'),
                         (40483, 114058.0, '2175-07-25', float('nan'), float('nan'), 'Discharge summary', 988, 'adiitionally', 'additionally'),
                         (371918, 179721.0, '2161-03-23', '2161-03-23 15:52:00', '2161-03-23 15:52:28', 'Physician ', 543, 'induartion', 'induration'),
                         (37785, 197665.0, '2179-11-23', float('nan'), float('nan'), 'Discharge summary', 6859, 'beccame', 'became'),
                         (550067, 188176.0, '2170-11-08', '2170-11-08 18:11:00', '2170-11-08 18:11:17', 'Respiratory ', 1205, 'beccame', 'became'),
                         (1306004, 193286.0, '2183-08-08', '2183-08-08 19:04:00', '2183-08-08 19:27:00', 'Nursing/other', 501, 'beccame', 'became'),
                         (319817, 109208.0, '2156-09-28', '2156-09-28 09:50:00', '2156-09-28 15:19:06', 'Physician ', 7739, 'resuem', 'resume'),
                         (342485, 108728.0, '2108-12-02', '2108-12-02 08:09:00', '2108-12-02 08:09:49', 'Physician ', 5267, 'resuem', 'resume'),
                         (431931, 170768.0, '2180-08-18', '2180-08-18 10:42:00', '2180-08-18 11:07:11', 'Physician ', 4294, 'resuem', 'resume'),
                         (646635, 170768.0, '2180-08-18', '2180-08-18 10:42:00', '2180-08-18 10:42:48', 'Physician ', 4294, 'resuem', 'resume'),
                         (730110, 108728.0, '2108-12-02', '2108-12-02 08:09:00', '2108-12-02 11:17:21', 'Physician ', 5420, 'resuem', 'resume'),
                         (1071767, float('nan'), '2135-03-25', '2135-03-25 10:33:00', float('nan'), 'Radiology', 2545, 'entends', 'extends'),
                         (610084, 138635.0, '2125-06-14', '2125-06-13 15:59:00', '2125-06-14 12:11:03', 'General', 8477, 'inferolater', 'inferolateral'),
                         (1085775, 143452.0, '2140-08-20', '2140-08-20 01:38:00', float('nan'), 'Radiology', 3879, 'inferolater', 'inferolateral'),
                         (1571945, 146567.0, '2178-12-23', '2178-12-23 15:30:00', '2178-12-23 16:03:00', 'Nursing/other', 1306, 'addeed', 'added'),
                         (1770258, 148312.0, '2113-06-12', '2113-06-12 05:17:00', '2113-06-12 05:54:00', 'Nursing/other', 667, 'addeed', 'added'),
                         (1829096, 126635.0, '2103-05-10', '2103-05-10 20:19:00', '2103-05-10 20:49:00', 'Nursing/other', 783, 'addeed', 'added'),
                         (1264820, 181695.0, '2177-11-27', '2177-11-27 06:44:00', '2177-11-27 07:01:00', 'Nursing/other', 237, 'tachcypnea', 'tachypnea'),
                         (69724, 140210.0, '2121-06-04', float('nan'), float('nan'), 'Echo', 520, 'procecure', 'procedure'),
                         (615108, float('nan'), '2170-04-04', '2170-04-04 16:23:00', '2170-04-04 16:23:22', 'Nursing', 607, 'procecure', 'procedure'),
                         (759711, 171475.0, '2131-06-05', '2131-06-05 13:22:00', float('nan'), 'Radiology', 6368, 'procecure', 'procedure'),
                         (778180, 136001.0, '2127-02-20', '2127-02-20 14:14:00', float('nan'), 'Radiology', 1692, 'procecure', 'procedure'),
                         (1877316, 105152.0, '2167-05-14', '2167-05-14 06:02:00', '2167-05-14 06:11:00', 'Nursing/other', 502, 'procecure', 'procedure'),
                         (90209, 128425.0, '2147-12-14', float('nan'), float('nan'), 'Echo', 2240, 'milfdly', 'mildly'),
                         (1658990, 187314.0, '2138-04-27', '2138-04-27 18:15:00', '2138-04-27 18:40:00', 'Nursing/other', 1308, 'placewd', 'placed'),
                         (1876911, 149291.0, '2109-04-22', '2109-04-22 05:39:00', '2109-04-22 05:43:00', 'Nursing/other', 67, 'placewd', 'placed'),
                         (564502, 191708.0, '2201-02-15', '2201-02-15 07:27:00', '2201-02-15 09:54:30', 'Physician ', 3902, 'controntation', 'confrontation'),
                         (6985, 145893.0, '2178-09-06', float('nan'), float('nan'), 'Discharge summary', 1320, 'persistenting', 'persisting'),
                         (396978, 142211.0, '2180-07-10', '2180-07-10 07:26:00', '2180-07-10 13:47:15', 'Physician ', 7547, 'persistenting', 'persisting'),
                         (1659277, 119332.0, '2165-06-15', '2165-06-15 20:00:00', '2165-06-15 20:37:00', 'Nursing/other', 1450, 'persistenting', 'persisting'),
                         (54640, 191654.0, '2191-09-23', float('nan'), float('nan'), 'Discharge summary', 3876, 'extingushes', 'extinguishes'),
                         (23488, 111671.0, '2131-01-04', float('nan'), float('nan'), 'Discharge summary', 14463, 'perhaphs', 'perhaps'),
                         (539077, 160593.0, '2199-11-04', '2199-11-04 01:51:00', '2199-11-04 02:10:24', 'Nursing', 131, 'opern', 'open'),
                         (582606, 198275.0, '2189-03-28', '2189-03-28 06:41:00', '2189-03-28 06:41:59', 'Nursing', 1041, 'opern', 'open'),
                         (1132215, 175374.0, '2163-06-23', '2163-06-23 09:57:00', float('nan'), 'Radiology', 353, 'opern', 'open'),
                         (1799446, 173890.0, '2123-01-27', '2123-01-27 02:09:00', '2123-01-27 02:32:00', 'Nursing/other', 1551, 'opern', 'open'),
                         (1899180, 146999.0, '2114-09-01', '2114-09-01 14:55:00', '2114-09-01 15:09:00', 'Nursing/other', 1392, 'opern', 'open'),
                         (2026134, 154189.0, '2108-12-10', '2108-12-10 04:53:00', '2108-12-10 05:05:00', 'Nursing/other', 229, 'huggger', 'hugger'),
                         (22415, 154098.0, '2131-08-03', float('nan'), float('nan'), 'Discharge summary', 8778, 'anxioltics', 'anxiolytics'),
                         (8813, 167163.0, '2117-05-17', float('nan'), float('nan'), 'Discharge summary', 4357, 'inclusing', 'including'),
                         (438318, 171976.0, '2185-12-01', '2185-12-01 23:45:00', '2185-12-01 23:45:38', 'Physician ', 10763, 'inclusing', 'including'),
                         (1253172, float('nan'), '2169-11-08', '2169-11-08 10:40:00', float('nan'), 'Radiology', 408, 'inclusing', 'including'),
                         (1625252, 188674.0, '2126-10-15', '2126-10-15 22:32:00', '2126-10-15 22:37:00', 'Nursing/other', 35, 'inclusing', 'including'),
                         (1699511, 143529.0, '2126-02-07', '2126-02-07 02:51:00', '2126-02-07 03:41:00', 'Nursing/other', 1987, 'inclusing', 'including'),
                         (7599, 113024.0, '2173-06-25', float('nan'), float('nan'), 'Discharge summary', 2337, 'sphyncterotomies', 'sphincterotomies'),
                         (9988, 123667.0, '2174-06-24', float('nan'), float('nan'), 'Discharge summary', 1513, 'sphyncterotomies', 'sphincterotomies'),
                         (764581, 178729.0, '2102-10-02', '2102-10-02 11:30:00', float('nan'), 'Radiology', 2261, 'paracoloic', 'paracolic'),
                         (1063536, 183070.0, '2190-03-08', '2190-03-08 06:00:00', float('nan'), 'Radiology', 4781, 'paracoloic', 'paracolic'),
                         (482969, 138481.0, '2136-03-23', '2136-03-23 12:03:00', '2136-03-23 12:03:24', 'Nutrition', 517, 'liquds', 'liquids'),
                         (1400278, 110206.0, '2103-07-04', '2103-07-04 18:14:00', '2103-07-04 18:30:00', 'Nursing/other', 1381, 'liquds', 'liquids'),
                         (1912082, 134784.0, '2149-11-13', '2149-11-13 08:12:00', '2149-11-13 08:24:00', 'Nursing/other', 375, 'liquds', 'liquids'),
                         (1917649, 178697.0, '2102-01-23', '2102-01-23 18:52:00', '2102-01-23 19:06:00', 'Nursing/other', 476, 'liquds', 'liquids'),
                         (1975438, 160192.0, '2115-05-09', '2115-05-09 19:15:00', '2115-05-09 19:27:00', 'Nursing/other', 667, 'liquds', 'liquids'),
                         (39491, 142671.0, '2106-08-31', float('nan'), float('nan'), 'Discharge summary', 8471, 'levimere', 'levemir'),
                         (1281734, 178555.0, '2107-01-15', '2107-01-15 04:19:00', '2107-01-15 04:31:00', 'Nursing/other', 845, 'sleeo', 'sleep'),
                         (1398278, 179173.0, '2152-04-21', '2152-04-21 09:10:00', '2152-04-21 09:13:00', 'Nursing/other', 98, 'sleeo', 'sleep'),
                         (2052770, 178732.0, '2157-01-23', '2157-01-23 08:21:00', '2157-01-23 08:29:00', 'Nursing/other', 206, 'sleeo', 'sleep'),
                         (38379, 182072.0, '2128-04-11', float('nan'), float('nan'), 'Discharge summary', 2182, 'phelyephrine', 'phenylephrine'),
                         (57425, 166492.0, '2161-11-10', float('nan'), float('nan'), 'Echo', 3425, 'phelyephrine', 'phenylephrine'),
                         (78770, 190137.0, '2186-06-08', float('nan'), float('nan'), 'Echo', 697, 'phelyephrine', 'phenylephrine'),
                         (1511844, 123434.0, '2148-01-16', '2148-01-16 17:05:00', '2148-01-16 17:19:00', 'Nursing/other', 516, 'quiclkly', 'quickly'),
                         (1599198, 190951.0, '2194-04-14', '2194-04-14 18:48:00', '2194-04-14 19:05:00', 'Nursing/other', 758, 'quiclkly', 'quickly'),
                         (1764869, 186478.0, '2156-08-21', '2156-08-21 05:35:00', '2156-08-21 05:46:00', 'Nursing/other', 279, 'quiclkly', 'quickly'),
                         (1818409, 138405.0, '2102-12-14', '2102-12-14 18:41:00', '2102-12-14 18:58:00', 'Nursing/other', 459, 'quiclkly', 'quickly'),
                         (317824, 113437.0, '2119-07-04', '2119-07-04 06:18:00', '2119-07-04 06:18:27', 'Physician ', 5333, 'hypoxemnia', 'hypoxemia'),
                         (49940, 109090.0, '2113-02-17', float('nan'), float('nan'), 'Discharge summary', 10902, 'polymycin', 'polymyxin'),
                         (330303, 189920.0, '2187-10-31', '2187-10-31 10:35:00', '2187-10-31 10:35:42', 'Physician ', 6459, 'propfolo', 'propofol'),
                         (726245, 164649.0, '2132-08-25', '2132-08-25 18:13:00', '2132-08-25 18:13:28', 'Nursing', 663, 'propfolo', 'propofol'),
                         (136869, float('nan'), '2191-05-19', float('nan'), float('nan'), 'ECG', 61, 'noited', 'noted'),
                         (641332, 108526.0, '2120-07-26', '2120-07-26 18:45:00', '2120-07-26 18:45:10', 'Nursing', 340, 'noited', 'noted'),
                         (768407, float('nan'), '2126-11-13', '2126-11-13 16:02:00', float('nan'), 'Radiology', 1132, 'noited', 'noted'),
                         (1455503, 138757.0, '2199-03-02', '2199-03-02 14:42:00', '2199-03-02 15:18:00', 'Nursing/other', 1507, 'noited', 'noted'),
                         (1728012, 101420.0, '2193-10-30', '2193-10-30 02:21:00', '2193-10-30 02:27:00', 'Nursing/other', 73, 'noited', 'noted'),
                         (48145, 156834.0, '2126-10-25', float('nan'), float('nan'), 'Discharge summary', 2173, 'hiting', 'hitting'),
                         (54265, 167790.0, '2182-04-26', float('nan'), float('nan'), 'Discharge summary', 691, 'hiting', 'hitting'),
                         (310507, float('nan'), '2133-02-22', '2133-02-22 06:44:00', '2133-02-22 06:54:14', 'Physician ', 5660, 'hiting', 'hitting'),
                         (1774665, 116031.0, '2174-05-19', '2174-05-19 05:40:00', '2174-05-19 05:59:00', 'Nursing/other', 679, 'hiting', 'hitting'),
                         (2010540, 177863.0, '2180-05-13', '2180-05-13 19:41:00', '2180-05-13 19:52:00', 'Nursing/other', 257, 'hiting', 'hitting'),
                         (522846, 113508.0, '2189-08-04', '2189-08-04 15:59:00', '2189-08-04 15:59:32', 'Nursing', 1058, 'conytinue', 'continue'),
                         (974230, 199014.0, '2156-09-26', '2156-09-26 10:30:00', float('nan'), 'Radiology', 2076, 'enahncing', 'enhancing'),
                         (1005292, float('nan'), '2146-03-26', '2146-03-26 04:03:00', float('nan'), 'Radiology', 2260, 'enahncing', 'enhancing'),
                         (1101501, float('nan'), '2193-11-20', '2193-11-20 07:04:00', float('nan'), 'Radiology', 1287, 'enahncing', 'enhancing'),
                         (1133676, 172356.0, '2119-07-01', '2119-07-01 17:00:00', float('nan'), 'Radiology', 1216, 'enahncing', 'enhancing'),
                         (1178332, float('nan'), '2154-03-15', '2154-03-15 20:28:00', float('nan'), 'Radiology', 654, 'enahncing', 'enhancing'),
                         (1208745, 127280.0, '2154-12-04', '2154-12-04 17:05:00', float('nan'), 'Radiology', 884, 'trasnformation', 'transformation'),
                         (1275643, 137965.0, '2158-01-07', '2158-01-07 18:41:00', '2158-01-07 19:04:00', 'Nursing/other', 1074, 'exstravation', 'extravasation'),
                         (45175, 171635.0, '2144-03-18', float('nan'), float('nan'), 'Discharge summary', 5202, 'stablitized', 'stabilized'),
                         (1095838, 183122.0, '2148-10-15', '2148-10-15 12:48:00', float('nan'), 'Radiology', 849, 'entrappment', 'entrapment'),
                         (48651, 185057.0, '2151-06-25', float('nan'), float('nan'), 'Discharge summary', 3857, 'creartinine', 'creatinine'),
                         (830040, float('nan'), '2166-09-15', '2166-09-15 09:10:00', float('nan'), 'Radiology', 908, 'creartinine', 'creatinine'),
                         (1138733, float('nan'), '2133-07-07', '2133-07-07 12:49:00', float('nan'), 'Radiology', 1383, 'isoechic', 'isoechoic'),
                         (3904, 164212.0, '2153-04-04', float('nan'), float('nan'), 'Discharge summary', 5346, 'prosthese', 'prostheses'),
                         (9543, 102047.0, '2174-09-05', float('nan'), float('nan'), 'Discharge summary', 965, 'apsects', 'aspects'),
                         (22812, 184626.0, '2102-12-20', float('nan'), float('nan'), 'Discharge summary', 472, 'apsects', 'aspects'),
                         (1580742, 135298.0, '2150-03-10', '2150-03-10 18:38:00', '2150-03-10 18:46:00', 'Nursing/other', 282, 'apsects', 'aspects'),
                         (1938996, 173779.0, '2191-03-03', '2191-03-03 00:42:00', '2191-03-03 00:56:00', 'Nursing/other', 1382, 'apsects', 'aspects'),
                         (11889, 138831.0, '2139-10-26', float('nan'), float('nan'), 'Discharge summary', 5836, 'receipient', 'recipient'),
                         (779701, 136166.0, '2138-02-24', '2138-02-24 11:02:00', float('nan'), 'Radiology', 162, 'receipient', 'recipient'),
                         (832504, float('nan'), '2110-09-16', '2110-09-16 13:42:00', float('nan'), 'Radiology', 182, 'receipient', 'recipient'),
                         (1202585, 138831.0, '2139-10-21', '2139-10-21 00:22:00', float('nan'), 'Radiology', 813, 'receipient', 'recipient'),
                         (1450864, 152476.0, '2178-04-05', '2178-04-05 10:52:00', '2178-04-05 11:02:00', 'Nursing/other', 661, 'receipient', 'recipient'),
                         (1292551, 127646.0, '2109-03-23', '2109-03-23 05:52:00', '2109-03-23 06:25:00', 'Nursing/other', 996, 'hypercarbnea', 'hypercarbia'),
                         (1507156, 125343.0, '2156-11-16', '2156-11-16 17:19:00', '2156-11-16 17:31:00', 'Nursing/other', 1939, 'hypercarbnea', 'hypercarbia'),
                         (1476290, 181105.0, '2122-08-12', '2122-08-12 17:27:00', '2122-08-12 17:45:00', 'Nursing/other', 2217, 'hypercarbnea', 'hypercarbia'),
                         (1601359, 109304.0, '2194-06-11', '2194-06-11 06:00:00', '2194-06-11 06:11:00', 'Nursing/other', 70, 'hypercarbnea', 'hypercarbia'),
                         (1911357, 121877.0, '2126-10-07', '2126-10-07 17:40:00', '2126-10-07 17:58:00', 'Nursing/other', 765, 'hypercarbnea', 'hypercarbia'),
                         (53899, 121039.0, '2200-04-09', float('nan'), float('nan'), 'Discharge summary', 4604, 'breakfst', 'breakfast'),
                         (1310560, 125440.0, '2145-03-16', '2145-03-16 16:39:00', '2145-03-16 17:31:00', 'Nursing/other', 113, 'breakfst', 'breakfast'),
                         (1608237, 101061.0, '2139-05-21', '2139-05-21 18:27:00', '2139-05-21 18:34:00', 'Nursing/other', 420, 'breakfst', 'breakfast'),
                         (1813672, 165973.0, '2109-07-27', '2109-07-27 11:06:00', '2109-07-27 11:09:00', 'Nursing/other', 94, 'breakfst', 'breakfast'),
                         (1819894, 124317.0, '2191-09-14', '2191-09-14 18:41:00', '2191-09-14 18:50:00', 'Nursing/other', 758, 'breakfst', 'breakfast'),
                         (10237, 176365.0, '2168-11-29', float('nan'), float('nan'), 'Discharge summary', 13795, 'hypolucency', 'hyperlucency'),
                         (744609, 147360.0, '2178-11-16', '2178-11-16 06:24:00', float('nan'), 'Radiology', 1493, 'hypolucency', 'hyperlucency'),
                         (823149, 187517.0, '2102-05-08', '2102-05-08 12:56:00', float('nan'), 'Radiology', 151, 'hypolucency', 'hyperlucency'),
                         (1028276, 111351.0, '2108-08-13', '2108-08-13 12:08:00', float('nan'), 'Radiology', 2016, 'hypolucency', 'hyperlucency'),
                         (1049206, 186914.0, '2155-01-13', '2155-01-13 16:06:00', float('nan'), 'Radiology', 2248, 'hypolucency', 'hyperlucency'),
                         (53335, 177050.0, '2170-11-20', float('nan'), float('nan'), 'Discharge summary', 6433, 'riskk', 'risk'),
                         (2067227, 137657.0, '2101-10-17', '2101-10-17 07:07:00', '2101-10-17 07:29:00', 'Nursing/other', 634, 'riskk', 'risk'),
                         (501650, 169482.0, '2194-03-12', '2194-03-12 22:15:00', '2194-03-12 22:15:53', 'Physician ', 8137, 'opitate', 'opiate'),
                         (45737, 159826.0, '2162-11-16', float('nan'), float('nan'), 'Discharge summary', 3801, 'acidoic', 'acidotic'),
                         (422276, 100509.0, '2172-08-27', '2172-08-27 14:30:00', '2172-08-27 18:35:21', 'Nursing', 967, 'acidoic', 'acidotic'),
                         (1396391, 135312.0, '2180-04-19', '2180-04-19 05:42:00', '2180-04-19 06:10:00', 'Nursing/other', 867, 'acidoic', 'acidotic'),
                         (1559992, 104424.0, '2151-01-15', '2151-01-15 04:40:00', '2151-01-15 05:06:00', 'Nursing/other', 1062, 'acidoic', 'acidotic'),
                         (1983264, 177998.0, '2130-06-30', '2130-06-30 06:03:00', '2130-06-30 06:08:00', 'Nursing/other', 441, 'acidoic', 'acidotic'),
                         (367571, 133570.0, '2180-02-25', '2180-02-25 06:36:00', '2180-02-25 06:37:04', 'Physician ', 3809, 'nchanged', 'unchanged'),
                         (467207, 155878.0, '2180-01-18', '2180-01-18 07:34:00', '2180-01-18 13:44:09', 'Physician ', 1105, 'nchanged', 'unchanged'),
                         (725501, 177517.0, '2153-02-13', '2153-02-13 07:25:00', '2153-02-13 12:26:44', 'Physician ', 878, 'nchanged', 'unchanged'),
                         (1013645, 125483.0, '2174-06-01', '2174-06-01 11:09:00', float('nan'), 'Radiology', 1041, 'nchanged', 'unchanged'),
                         (1324433, 103175.0, '2185-02-05', '2185-02-05 07:44:00', '2185-02-05 07:51:00', 'Nursing/other', 21, 'nchanged', 'unchanged'),
                         (920668, 146680.0, '2100-08-18', '2100-08-18 18:18:00', float('nan'), 'Radiology', 743, 'stomah', 'stomach'),
                         (979751, 170228.0, '2199-10-19', '2199-10-19 10:38:00', float('nan'), 'Radiology', 1960, 'stomah', 'stomach'),
                         (994058, 183234.0, '2138-12-22', '2138-12-22 20:10:00', float('nan'), 'Radiology', 3544, 'stomah', 'stomach'),
                         (45661, 107802.0, '2149-11-19', float('nan'), float('nan'), 'Discharge summary', 5903, 'hypodenities', 'hypodensities'),
                         (761977, 126173.0, '2132-07-24', '2132-07-24 10:20:00', float('nan'), 'Radiology', 1252, 'hypodenities', 'hypodensities'),
                         (1165183, 188651.0, '2193-12-28', '2193-12-28 20:59:00', float('nan'), 'Radiology', 610, 'hypodenities', 'hypodensities'),
                         (3213, 189311.0, '2139-07-15', float('nan'), float('nan'), 'Discharge summary', 21163, 'otator', 'rotator'),
                         (523735, 100522.0, '2139-07-22', '2139-07-22 11:47:00', '2139-07-22 11:47:31', 'Physician ', 8606, 'otator', 'rotator'),
                         (854792, float('nan'), '2116-04-08', '2116-04-08 10:32:00', float('nan'), 'Radiology', 1184, 'initiationg', 'initiating'),
                         (1260280, 127149.0, '2163-03-12', '2163-03-12 19:41:00', '2163-03-12 20:56:00', 'Nursing/other', 1765, 'usally', 'usually'),
                         (1466276, 102251.0, '2199-06-05', '2199-06-05 18:01:00', '2199-06-05 18:05:00', 'Nursing/other', 217, 'usally', 'usually'),
                         (1436905, 124907.0, '2172-11-29', '2172-11-29 16:51:00', '2172-11-29 17:17:00', 'Nursing/other', 756, 'usally', 'usually'),
                         (1650720, 180525.0, '2167-03-27', '2167-03-27 03:07:00', '2167-03-27 03:44:00', 'Nursing/other', 539, 'usally', 'usually'),
                         (1701710, 109697.0, '2162-11-15', '2162-11-15 18:02:00', '2162-11-15 18:45:00', 'Nursing/other', 131, 'usally', 'usually'),
                         (22526, 190392.0, '2134-07-24', float('nan'), float('nan'), 'Discharge summary', 1593, 'perfored', 'performed'),
                         (23197, 163436.0, '2188-12-11', float('nan'), float('nan'), 'Discharge summary', 2260, 'perfored', 'performed'),
                         (26604, 114623.0, '2171-06-06', float('nan'), float('nan'), 'Discharge summary', 7654, 'perfored', 'performed'),
                         (37620, 128238.0, '2184-03-19', float('nan'), float('nan'), 'Discharge summary', 7466, 'perfored', 'performed'),
                         (1662716, 161127.0, '2106-06-04', '2106-06-04 17:08:00', '2106-06-04 17:20:00', 'Nursing/other', 995, 'perfored', 'performed'),
                         (37965, 148348.0, '2111-11-03', float('nan'), float('nan'), 'Discharge summary', 8752, 'nondiagnositic', 'nondiagnostic'),
                         (472949, 127164.0, '2150-02-16', '2150-02-16 07:05:00', '2150-02-16 07:05:13', 'Physician ', 5105, 'nondiagnositic', 'nondiagnostic'),
                         (780099, float('nan'), '2123-04-23', '2123-04-23 14:45:00', float('nan'), 'Radiology', 514, 'nondiagnositic', 'nondiagnostic'),
                         (1567292, 164713.0, '2182-11-06', '2182-11-06 17:29:00', '2182-11-06 18:13:00', 'Nursing/other', 2181, 'interparachymal', 'interparenchymal'),
                         (26142, 149849.0, '2123-04-26', float('nan'), float('nan'), 'Discharge summary', 12292, 'couselor', 'counselor'),
                         (46877, 197971.0, '2121-07-12', float('nan'), float('nan'), 'Discharge summary', 2179, 'couselor', 'counselor'),
                         (1851151, 157740.0, '2161-05-21', '2161-05-21 14:18:00', '2161-05-21 14:24:00', 'Nursing/other', 437, 'couselor', 'counselor'),
                         (2065382, 165231.0, '2116-10-22', '2116-10-22 13:33:00', '2116-10-22 14:08:00', 'Nursing/other', 607, 'couselor', 'counselor'),
                         (1789854, 140674.0, '2180-03-12', '2180-03-12 05:35:00', '2180-03-12 05:44:00', 'Nursing/other', 381, 'consustently', 'consistently'),
                         (36399, 102913.0, '2120-06-25', float('nan'), float('nan'), 'Discharge summary', 7424, 'jypokinesis', 'hypokinesis'),
                         (1414074, 118966.0, '2142-06-22', '2142-06-22 10:43:00', '2142-06-22 10:45:00', 'Nursing/other', 16, 'coomfortable', 'comfortable'),
                         (1496723, 157165.0, '2106-10-12', '2106-10-12 15:25:00', '2106-10-12 15:28:00', 'Nursing/other', 155, 'coomfortable', 'comfortable'),
                         (1586079, 165720.0, '2128-02-29', '2128-02-29 10:20:00', '2128-02-29 10:21:00', 'Nursing/other', 173, 'coomfortable', 'comfortable'),
                         (1872976, 179204.0, '2117-08-04', '2117-08-04 19:02:00', '2117-08-04 19:07:00', 'Nursing/other', 72, 'coomfortable', 'comfortable'),
                         (14048, 137829.0, '2183-08-14', float('nan'), float('nan'), 'Discharge summary', 9807, 'eythromycin', 'erythromycin'),
                         (38392, 104737.0, '2187-09-20', float('nan'), float('nan'), 'Discharge summary', 5249, 'eythromycin', 'erythromycin'),
                         (1340597, 169553.0, '2185-08-18', '2185-08-18 17:05:00', '2185-08-18 17:10:00', 'Nursing/other', 207, 'eythromycin', 'erythromycin'),
                         (1657461, 126060.0, '2168-03-07', '2168-03-07 05:24:00', '2168-03-07 05:45:00', 'Nursing/other', 1040, 'eythromycin', 'erythromycin'),
                         (1838280, 119236.0, '2145-06-18', '2145-06-18 03:15:00', '2145-06-18 03:27:00', 'Nursing/other', 199, 'eythromycin', 'erythromycin'),
                         (1138484, float('nan'), '2104-07-20', '2104-07-20 19:48:00', float('nan'), 'Radiology', 1535, 'verrtex', 'vertex'),
                         (401312, 193788.0, '2186-05-26', '2186-05-26 07:38:00', '2186-05-26 11:25:09', 'Physician ', 3383, 'recreationl', 'recreational'),
                         (1571800, 175889.0, '2102-01-03', '2102-01-03 15:20:00', '2102-01-03 17:13:00', 'Nursing/other', 893, 'iompared', 'compared'),
                         (15973, 186850.0, '2146-10-20', float('nan'), float('nan'), 'Discharge summary', 6056, 'aggitating', 'agitating'),
                         (1301755, 198864.0, '2139-03-18', '2139-03-18 22:22:00', '2139-03-18 22:27:00', 'Nursing/other', 158, 'aggitating', 'agitating'),
                         (1561770, 123323.0, '2132-01-07', '2132-01-07 15:24:00', '2132-01-07 15:28:00', 'Nursing/other', 231, 'aggitating', 'agitating'),
                         (1556878, 158592.0, '2164-01-07', '2164-01-07 16:39:00', '2164-01-07 17:18:00', 'Nursing/other', 179, 'aggitating', 'agitating'),
                         (1771542, 128609.0, '2107-02-11', '2107-02-11 07:08:00', '2107-02-11 07:41:00', 'Nursing/other', 384, 'aggitating', 'agitating'),
                         (6505, 140875.0, '2175-04-01', float('nan'), float('nan'), 'Discharge summary', 729, 'symmptoms', 'symptoms'),
                         (34590, 125705.0, '2103-09-21', float('nan'), float('nan'), 'Discharge summary', 772, 'symmptoms', 'symptoms'),
                         (55083, 163089.0, '2104-08-22', float('nan'), float('nan'), 'Discharge summary', 706, 'symmptoms', 'symptoms'),
                         (733681, 147469.0, '2141-06-11', '2141-06-11 14:04:00', '2141-06-11 14:04:29', 'Physician ', 525, 'symmptoms', 'symptoms'),
                         (721093, 125487.0, '2114-02-27', '2114-02-27 04:03:00', '2114-02-27 04:03:41', 'Nursing', 261, 'intrgrillin', 'integrilin'),
                         (374412, 130889.0, '2196-01-31', '2196-01-31 03:03:00', '2196-01-31 04:28:46', 'Nursing', 1557, 'cathotomy', 'canthotomy'),
                         (436772, 188690.0, '2199-11-14', '2199-11-14 07:43:00', '2199-11-14 11:48:46', 'Physician ', 4138, 'eneteroscopy', 'enteroscopy'),
                         (444767, 142885.0, '2192-09-20', '2192-09-20 10:47:00', '2192-09-20 18:42:45', 'Physician ', 4051, 'eneteroscopy', 'enteroscopy'),
                         (525917, 114549.0, '2193-10-17', '2193-10-17 21:11:00', '2193-10-17 21:11:46', 'Physician ', 676, 'eneteroscopy', 'enteroscopy'),
                         (657461, 142885.0, '2192-09-20', '2192-09-20 10:47:00', '2192-09-20 10:59:43', 'Physician ', 3798, 'eneteroscopy', 'enteroscopy'),
                         (41660, 187913.0, '2121-08-07', float('nan'), float('nan'), 'Discharge summary', 14748, 'tropnins', 'troponins'),
                         (47264, 153072.0, '2185-05-25', float('nan'), float('nan'), 'Discharge summary', 12306, 'tropnins', 'troponins'),
                         (689122, 112212.0, '2189-11-20', '2189-11-20 07:23:00', '2189-11-20 07:23:34', 'Physician ', 4385, 'tropnins', 'troponins'),
                         (40043, 163381.0, '2171-11-27', float('nan'), float('nan'), 'Discharge summary', 10878, 'celcoxib', 'celecoxib'),
                         (29861, 113456.0, '2152-12-11', float('nan'), float('nan'), 'Discharge summary', 2107, 'retsriction', 'restriction'),
                         (1510004, 105896.0, '2162-01-19', '2162-01-19 12:31:00', '2162-01-19 13:10:00', 'Nursing/other', 737, 'retsriction', 'restriction'),
                         (1030818, 133357.0, '2154-09-29', '2154-09-29 10:31:00', float('nan'), 'Radiology', 621, 'thickineng', 'thickening'),
                         (712303, 161612.0, '2122-04-24', '2122-04-24 07:40:00', '2122-04-24 20:14:37', 'Physician ', 6902, 'addedndum', 'addendum'),
                         (1651441, 123246.0, '2163-02-18', '2163-02-18 06:34:00', '2163-02-18 06:35:00', 'Nursing/other', 17, 'addedndum', 'addendum'),
                         (1668909, 143040.0, '2144-01-31', '2144-01-31 18:03:00', '2144-01-31 18:05:00', 'Nursing/other', 7, 'addedndum', 'addendum'),
                         (404635, 154395.0, '2119-05-05', '2119-05-05 12:22:00', '2119-05-05 12:22:26', 'Nutrition', 2718, 'imprves', 'improves'),
                         (423264, 124194.0, '2119-08-18', '2119-08-18 08:41:00', '2119-08-18 09:07:11', 'Physician ', 4969, 'imprves', 'improves'),
                         (1916710, 143822.0, '2189-07-10', '2189-07-10 04:10:00', '2189-07-10 04:28:00', 'Nursing/other', 1613, 'imprves', 'improves'),
                         (785913, 117716.0, '2154-06-04', '2154-06-04 19:02:00', float('nan'), 'Radiology', 354, 'goint', 'going'),
                         (1264536, 193328.0, '2184-10-13', '2184-10-13 12:43:00', '2184-10-13 13:02:00', 'Nursing/other', 978, 'goint', 'going'),
                         (1281789, 110273.0, '2148-02-28', '2148-02-28 16:12:00', '2148-02-28 16:32:00', 'Nursing/other', 87, 'goint', 'going'),
                         (1404229, 136195.0, '2160-06-12', '2160-06-12 12:54:00', '2160-06-12 13:17:00', 'Nursing/other', 215, 'goint', 'going'),
                         (1919444, 156214.0, '2193-01-09', '2193-01-09 23:08:00', '2193-01-09 23:23:00', 'Nursing/other', 408, 'goint', 'going'),
                         (51793, 141764.0, '2139-12-06', float('nan'), float('nan'), 'Discharge summary', 1871, 'lichanified', 'lichenified'),
                         (597657, 100188.0, '2193-04-12', '2193-04-12 17:27:00', '2193-04-12 17:28:07', 'Nursing', 747, 'ilnconsistently', 'inconsistently'),
                         (1154485, float('nan'), '2175-12-06', '2175-12-06 10:16:00', float('nan'), 'Radiology', 4087, 'fragmeny', 'fragment'),
                         (1789784, 159014.0, '2137-06-25', '2137-06-25 05:24:00', '2137-06-25 06:01:00', 'Nursing/other', 684, 'veerapamil', 'verapamil'),
                         (42003, 165089.0, '2105-09-28', float('nan'), float('nan'), 'Discharge summary', 8867, 'radaiation', 'radiation'),
                         (1636299, 161277.0, '2107-10-17', '2107-10-17 17:51:00', '2107-10-17 18:55:00', 'Nursing/other', 79, 'radaiation', 'radiation'),
                         (21620, 154577.0, '2184-10-07', float('nan'), float('nan'), 'Discharge summary', 7090, 'trancortical', 'transcortical'),
                         (23520, 195674.0, '2107-07-02', float('nan'), float('nan'), 'Discharge summary', 11142, 'ruputure', 'rupture'),
                         (39265, 186115.0, '2132-03-11', float('nan'), float('nan'), 'Discharge summary', 444, 'ruputure', 'rupture'),
                         (637287, 198533.0, '2179-09-16', '2179-09-16 01:07:00', '2179-09-16 05:48:49', 'Physician ', 8868, 'ruputure', 'rupture'),
                         (1481166, 147499.0, '2108-08-01', '2108-08-01 07:02:00', '2108-08-01 07:12:00', 'Nursing/other', 529, 'ruputure', 'rupture'),
                         (1747224, 139039.0, '2152-01-21', '2152-01-21 16:40:00', '2152-01-21 16:49:00', 'Nursing/other', 620, 'ruputure', 'rupture'),
                         (721104, 166313.0, '2162-04-19', '2162-04-19 17:06:00', '2162-04-19 21:38:28', 'Nursing', 2031, 'antiarrhymatic', 'antiarrhythmic'),
                         (2063, 107438.0, '2191-09-05', float('nan'), float('nan'), 'Discharge summary', 2336, 'hyperphosphtemia', 'hyperphosphatemia'),
                         (49067, 165699.0, '2143-07-21', float('nan'), float('nan'), 'Discharge summary', 7252, 'dwait', 'wait'),
                         (536830, 199972.0, '2186-08-31', '2186-08-31 04:55:00', '2186-08-31 04:55:48', 'Nursing', 800, 'respositions', 'repositions'),
                         (1541867, 151761.0, '2147-08-14', '2147-08-14 05:38:00', '2147-08-14 05:49:00', 'Nursing/other', 75, 'respositions', 'repositions'),
                         (1787590, 136012.0, '2144-12-31', '2144-12-31 05:27:00', '2144-12-31 05:37:00', 'Nursing/other', 817, 'respositions', 'repositions'),
                         (1879424, 117913.0, '2176-07-14', '2176-07-14 17:07:00', '2176-07-14 17:39:00', 'Nursing/other', 110, 'respositions', 'repositions'),
                         (2017590, 111329.0, '2191-09-14', '2191-09-14 04:57:00', '2191-09-14 05:23:00', 'Nursing/other', 163, 'respositions', 'repositions'),
                         (22856, 108303.0, '2132-10-02', float('nan'), float('nan'), 'Discharge summary', 11286, 'interpet', 'interpret'),
                         (313236, 111039.0, '2109-04-19', '2109-04-19 12:36:00', '2109-04-19 12:36:29', 'Physician ', 3987, 'interpet', 'interpret'),
                         (1568169, 181528.0, '2171-10-04', '2171-10-04 19:28:00', '2171-10-04 19:36:00', 'Nursing/other', 82, 'interpet', 'interpret'),
                         (1804878, 198847.0, '2129-08-26', '2129-08-26 04:05:00', '2129-08-26 04:19:00', 'Nursing/other', 1226, 'interpet', 'interpret'),
                         (1890630, 185878.0, '2193-05-25', '2193-05-25 05:30:00', '2193-05-25 05:51:00', 'Nursing/other', 84, 'interpet', 'interpret'),
                         (462327, 189804.0, '2139-12-04', '2139-12-04 07:43:00', '2139-12-04 07:43:25', 'Physician ', 6013, 'unlely', 'unlikely'),
                         (45481, 191037.0, '2192-10-31', float('nan'), float('nan'), 'Discharge summary', 8525, 'supoprtive', 'supportive'),
                         (1575784, 134562.0, '2124-12-29', '2124-12-29 16:27:00', '2124-12-29 16:51:00', 'Nursing/other', 1374, 'supoprtive', 'supportive'),
                         (319928, 134413.0, '2201-08-12', '2201-08-07 18:41:00', '2201-08-12 16:19:45', 'Nursing', 53, 'heptacellular', 'hepatocellular'),
                         (48733, 100713.0, '2201-06-24', float('nan'), float('nan'), 'Discharge summary', 8056, 'ecsherichia', 'escherichia'),
                         (746934, 194779.0, '2137-01-28', '2137-01-28 16:53:00', float('nan'), 'Radiology', 827, 'peratracheal', 'paratracheal'),
                         (878905, 115814.0, '2175-09-09', '2175-09-09 07:42:00', float('nan'), 'Radiology', 928, 'peratracheal', 'paratracheal'),
                         (957359, 133725.0, '2155-04-12', '2155-04-12 22:02:00', float('nan'), 'Radiology', 1949, 'peratracheal', 'paratracheal'),
                         (38106, 120306.0, '2122-06-29', float('nan'), float('nan'), 'Discharge summary', 12117, 'subcaspsular', 'subcapsular'),
                         (1109503, 196702.0, '2137-11-18', '2137-11-18 11:43:00', float('nan'), 'Radiology', 507, 'subcaspsular', 'subcapsular'),
                         (25732, 160458.0, '2198-07-13', float('nan'), float('nan'), 'Discharge summary', 994, 'dlateral', 'lateral'),
                         (1034108, 123807.0, '2181-09-05', '2181-09-05 04:12:00', float('nan'), 'Radiology', 705, 'dlateral', 'lateral'),
                         (361984, 180711.0, '2169-01-22', '2169-01-22 08:19:00', '2169-01-22 11:08:42', 'Physician ', 477, 'colisitina', 'colistin'),
                         (41180, 120222.0, '2109-04-24', float('nan'), float('nan'), 'Discharge summary', 4349, 'criciod', 'cricoid'),
                         (1329988, 145174.0, '2143-01-28', '2143-01-28 14:40:00', '2143-01-28 14:56:00', 'Nursing/other', 1303, 'containemnt', 'containment'),
                         (1370614, 199569.0, '2104-09-06', '2104-09-06 14:52:00', '2104-09-06 15:07:00', 'Nursing/other', 901, 'containemnt', 'containment'),
                         (1439971, 188355.0, '2200-12-23', '2200-12-23 18:26:00', '2200-12-23 18:30:00', 'Nursing/other', 541, 'containemnt', 'containment'),
                         (2060597, 178058.0, '2200-03-27', '2200-03-27 18:09:00', '2200-03-27 18:21:00', 'Nursing/other', 899, 'containemnt', 'containment'),
                         (22045, 155091.0, '2131-10-03', float('nan'), float('nan'), 'Discharge summary', 2660, 'immunosuppresents', 'immunosuppressants'),
                         (1468945, 196615.0, '2157-05-31', '2157-05-31 17:38:00', '2157-05-31 18:06:00', 'Nursing/other', 1486, 'immunosuppresents', 'immunosuppressants'),
                         (1630624, 133453.0, '2180-11-13', '2180-11-13 03:29:00', '2180-11-13 04:12:00', 'Nursing/other', 61, 'immunosuppresents', 'immunosuppressants'),
                         (1708827, 105274.0, '2105-01-10', '2105-01-10 05:10:00', '2105-01-10 05:31:00', 'Nursing/other', 1577, 'immunosuppresents', 'immunosuppressants'),
                         (1978570, 174479.0, '2170-12-20', '2170-12-20 04:49:00', '2170-12-20 05:21:00', 'Nursing/other', 225, 'immunosuppresents', 'immunosuppressants'),
                         (35081, 145192.0, '2194-01-16', float('nan'), float('nan'), 'Discharge summary', 7903, 'brance', 'branch'),
                         (63857, 130802.0, '2181-06-06', float('nan'), float('nan'), 'Echo', 1460, 'brance', 'branch'),
                         (338178, 134632.0, '2104-12-09', '2104-12-09 05:38:00', '2104-12-09 14:31:39', 'Physician ', 3333, 'expirqtory', 'expiratory'),
                         (338737, 134632.0, '2104-12-08', '2104-12-08 05:52:00', '2104-12-08 12:40:13', 'Physician ', 3127, 'expirqtory', 'expiratory'),
                         (366387, 170022.0, '2200-01-14', '2200-01-14 07:06:00', '2200-01-14 07:23:57', 'Nursing', 912, 'haematochezia', 'hematochezia'),
                         (5359, 182206.0, '2195-02-23', float('nan'), float('nan'), 'Discharge summary', 18542, 'bronhcial', 'bronchial'),
                         (5359, 182206.0, '2195-02-23', float('nan'), float('nan'), 'Discharge summary', 20475, 'bronhcial', 'bronchial'),
                         (25484, 147199.0, '2160-09-30', float('nan'), float('nan'), 'Discharge summary', 682, 'bronhcial', 'bronchial'),
                         (13013, 110378.0, '2179-06-01', float('nan'), float('nan'), 'Discharge summary', 2942, 'cnadidate', 'candidate'),
                         (1772020, 150957.0, '2163-09-21', '2163-09-21 18:00:00', '2163-09-21 18:17:00', 'Nursing/other', 1681, 'cnadidate', 'candidate'),
                         (329712, 116877.0, '2124-09-22', '2124-09-22 07:34:00', '2124-09-22 12:57:13', 'Physician ', 9063, 'assistsance', 'assistance'),
                         (330340, 116877.0, '2124-09-21', '2124-09-21 07:47:00', '2124-09-21 21:54:08', 'Physician ', 9047, 'assistsance', 'assistance'),
                         (16048, 192034.0, '2173-03-13', float('nan'), float('nan'), 'Discharge summary', 224, 'pedestrin', 'pedestrian'),
                         (798100, 137981.0, '2200-11-08', '2200-11-08 15:19:00', float('nan'), 'Radiology', 234, 'pedestrin', 'pedestrian'),
                         (24042, 150309.0, '2151-03-27', float('nan'), float('nan'), 'Discharge summary', 646, 'erythromyocin', 'erythromycin'),
                         (46120, 123540.0, '2171-04-15', float('nan'), float('nan'), 'Discharge summary', 11832, 'erythromyocin', 'erythromycin'),
                         (650367, 140792.0, '2153-11-06', '2153-11-06 14:47:00', '2153-11-06 14:47:28', 'Rehab Services', 637, 'erythromyocin', 'erythromycin'),
                         (1744045, 138031.0, '2103-06-23', '2103-06-23 02:52:00', '2103-06-23 02:59:00', 'Nursing/other', 937, 'erythromyocin', 'erythromycin'),
                         (1951908, 153442.0, '2192-06-16', '2192-06-16 02:38:00', '2192-06-16 02:46:00', 'Nursing/other', 849, 'erythromyocin', 'erythromycin'),
                         (8156, 164562.0, '2190-12-17', float('nan'), float('nan'), 'Discharge summary', 5340, 'gentteal', 'genteal'),
                         (40760, 166642.0, '2128-05-31', float('nan'), float('nan'), 'Discharge summary', 1326, 'heterogebous', 'heterogenous'),
                         (1535317, 141603.0, '2145-07-13', '2145-07-13 03:18:00', '2145-07-13 04:01:00', 'Nursing/other', 878, 'cardiognitc', 'cardiogenic'),
                         (1520006, 130751.0, '2112-04-28', '2112-04-28 05:13:00', '2112-04-28 05:44:00', 'Nursing/other', 273, 'antipyschotic', 'antipsychotic'),
                         (1601027, 127853.0, '2105-07-05', '2105-07-05 05:12:00', '2105-07-05 05:30:00', 'Nursing/other', 1608, 'antipyschotic', 'antipsychotic'),
                         (1902749, 125440.0, '2145-03-16', '2145-03-16 05:11:00', '2145-03-16 05:17:00', 'Nursing/other', 890, 'antipyschotic', 'antipsychotic'),
                         (1280442, 184444.0, '2158-10-07', '2158-10-07 00:45:00', '2158-10-07 00:49:00', 'Nursing/other', 74, 'justs', 'just'),
                         (1505587, 157165.0, '2106-12-06', '2106-12-06 15:11:00', '2106-12-06 15:14:00', 'Nursing/other', 379, 'justs', 'just'),
                         (1651253, 169214.0, '2187-01-11', '2187-01-11 17:38:00', '2187-01-11 18:30:00', 'Nursing/other', 1228, 'justs', 'just'),
                         (1714329, 159670.0, '2180-10-12', '2180-10-12 05:09:00', '2180-10-12 05:31:00', 'Nursing/other', 1034, 'justs', 'just'),
                         (1948073, 137605.0, '2176-09-08', '2176-09-08 13:51:00', '2176-09-08 14:11:00', 'Nursing/other', 795, 'justs', 'just'),
                         (412497, 110458.0, '2171-06-05', '2171-06-05 01:39:00', '2171-06-05 04:50:51', 'Nursing', 547, 'distendes', 'distended'),
                         (654930, 140792.0, '2153-10-02', '2153-10-02 05:32:00', '2153-10-02 05:32:13', 'Nursing', 1596, 'distendes', 'distended'),
                         (1494926, 134480.0, '2105-10-27', '2105-10-27 05:03:00', '2105-10-27 05:43:00', 'Nursing/other', 1284, 'distendes', 'distended'),
                         (12487, 156641.0, '2143-12-24', float('nan'), float('nan'), 'Discharge summary', 20361, 'nectotic', 'necrotic'),
                         (791410, float('nan'), '2164-07-05', '2164-07-05 13:13:00', float('nan'), 'Radiology', 140, 'nectotic', 'necrotic'),
                         (868978, 172507.0, '2133-06-03', '2133-06-03 09:49:00', float('nan'), 'Radiology', 356, 'nectotic', 'necrotic'),
                         (1302847, 171236.0, '2153-05-08', '2153-05-08 03:09:00', '2153-05-08 03:19:00', 'Nursing/other', 425, 'nectotic', 'necrotic'),
                         (2004499, 154225.0, '2160-02-23', '2160-02-23 06:13:00', '2160-02-23 06:30:00', 'Nursing/other', 1383, 'nectotic', 'necrotic'),
                         (1609977, 156598.0, '2103-06-05', '2103-06-05 06:14:00', '2103-06-05 06:34:00', 'Nursing/other', 838, 'ythick', 'thick'),
                         (449541, 138145.0, '2170-11-05', '2170-11-05 05:20:00', '2170-11-05 05:21:27', 'Physician ', 7555, 'senstivites', 'sensitivities'),
                         (460064, 138145.0, '2170-11-04', '2170-11-04 05:00:00', '2170-11-04 05:00:45', 'Physician ', 6900, 'senstivites', 'sensitivities'),
                         (690563, 138145.0, '2170-11-03', '2170-11-03 05:22:00', '2170-11-03 05:22:07', 'Physician ', 7419, 'senstivites', 'sensitivities'),
                         (418268, 127885.0, '2173-07-01', '2173-07-01 05:32:00', '2173-07-01 05:32:46', 'Physician ', 3839, 'downgoin', 'downgoing'),
                         (507773, 144591.0, '2111-06-12', '2111-06-12 17:00:00', '2111-06-12 17:00:27', 'Respiratory ', 1342, 'tranpsort', 'transport'),
                         (716539, 131324.0, '2177-03-11', '2177-03-11 19:06:00', '2177-03-11 19:06:17', 'Respiratory ', 1278, 'tranpsort', 'transport'),
                         (720264, 170675.0, '2182-05-07', '2182-05-07 17:39:00', '2182-05-07 17:39:34', 'Respiratory ', 1379, 'tranpsort', 'transport'),
                         (41699, 178896.0, '2165-11-20', float('nan'), float('nan'), 'Discharge summary', 1388, 'cefotaxmin', 'cefotaxime'),
                         (1266501, 155393.0, '2157-03-31', '2157-03-31 18:36:00', '2157-03-31 19:01:00', 'Nursing/other', 1168, 'sennekot', 'senekot'),
                         (1464801, 157809.0, '2120-06-21', '2120-06-21 05:18:00', '2120-06-21 05:39:00', 'Nursing/other', 1388, 'sennekot', 'senekot'),
                         (1548519, 196308.0, '2203-08-08', '2203-08-08 06:39:00', '2203-08-08 06:49:00', 'Nursing/other', 818, 'sennekot', 'senekot'),
                         (2008977, 103618.0, '2152-11-10', '2152-11-10 18:12:00', '2152-11-10 18:39:00', 'Nursing/other', 558, 'sennekot', 'senekot'),
                         (1997914, 117705.0, '2148-01-11', '2148-01-11 05:47:00', '2148-01-11 06:08:00', 'Nursing/other', 889, 'sennekot', 'senekot'),
                         (33574, 143285.0, '2147-02-14', float('nan'), float('nan'), 'Discharge summary', 4312, 'secontary', 'secondary'),
                         (18365, 165684.0, '2130-08-10', float('nan'), float('nan'), 'Discharge summary', 2614, 'secon', 'second'),
                         (791991, float('nan'), '2192-08-29', '2192-08-29 08:54:00', float('nan'), 'Radiology', 979, 'secon', 'second'),
                         (1289223, 130841.0, '2101-11-20', '2101-11-20 17:56:00', '2101-11-20 18:37:00', 'Nursing/other', 3109, 'secon', 'second'),
                         (1582050, 104215.0, '2191-03-10', '2191-03-10 17:21:00', '2191-03-10 17:50:00', 'Nursing/other', 3178, 'secon', 'second'),
                         (2015505, 165593.0, '2164-10-02', '2164-10-02 18:24:00', '2164-10-02 18:50:00', 'Nursing/other', 804, 'secon', 'second'),
                         (325744, 100738.0, '2144-10-23', '2144-10-23 06:34:00', '2144-10-23 11:05:57', 'Physician ', 4117, 'tinglin', 'tingling'),
                         (540637, 106048.0, '2119-11-01', '2119-11-01 01:21:00', '2119-11-01 01:21:24', 'Physician ', 2909, 'tinglin', 'tingling'),
                         (1685716, 180238.0, '2115-12-28', '2115-12-28 06:06:00', '2115-12-28 06:19:00', 'Nursing/other', 414, 'tinglin', 'tingling'),
                         (1781025, 157901.0, '2182-11-16', '2182-11-16 17:37:00', '2182-11-16 18:30:00', 'Nursing/other', 228, 'tinglin', 'tingling'),
                         (44749, 172553.0, '2140-10-05', float('nan'), float('nan'), 'Discharge summary', 14754, 'arrythmmia', 'arrythmia'),
                         (705768, 161258.0, '2166-02-13', '2166-02-13 20:37:00', '2166-02-13 20:46:20', 'Nursing', 1683, 'contonese', 'cantonese'),
                         (1475021, 179519.0, '2142-05-29', '2142-05-29 05:32:00', '2142-05-29 05:52:00', 'Nursing/other', 774, 'contonese', 'cantonese'),
                         (1500098, 101276.0, '2120-10-19', '2120-10-19 03:16:00', '2120-10-19 03:29:00', 'Nursing/other', 69, 'contonese', 'cantonese'),
                         (1576519, 100489.0, '2164-02-07', '2164-02-07 17:41:00', '2164-02-07 18:00:00', 'Nursing/other', 102, 'contonese', 'cantonese'),
                         (1661005, 147262.0, '2165-06-20', '2165-06-20 17:51:00', '2165-06-20 18:35:00', 'Nursing/other', 3067, 'contonese', 'cantonese'),
                         (428678, 160890.0, '2180-07-24', '2180-07-24 19:48:00', '2180-07-24 20:05:03', 'Physician ', 347, 'pregant', 'pregnant'),
                         (510092, 169880.0, '2115-03-31', '2115-03-31 08:22:00', '2115-03-31 11:15:52', 'Physician ', 5988, 'pregant', 'pregnant'),
                         (1294136, 119441.0, '2143-09-18', '2143-09-18 17:31:00', '2143-09-18 17:42:00', 'Nursing/other', 240, 'pregant', 'pregnant'),
                         (1777959, 132060.0, '2112-05-25', '2112-05-25 15:06:00', '2112-05-25 15:12:00', 'Nursing/other', 636, 'pregant', 'pregnant'),
                         (313743, 124661.0, '2187-03-05', '2187-03-05 17:34:00', '2187-03-05 17:34:54', 'Nursing', 1284, 'neuropatic', 'neuropathic'),
                         (379641, 176119.0, '2147-05-03', '2147-05-03 13:22:00', '2147-05-03 13:23:12', 'Physician ', 1207, 'neuropatic', 'neuropathic'),
                         (459036, 148207.0, '2183-01-30', '2183-01-30 04:00:00', '2183-01-30 04:00:39', 'Nursing', 1376, 'neuropatic', 'neuropathic'),
                         (600281, 176119.0, '2147-05-04', '2147-05-04 06:56:00', '2147-05-04 15:29:03', 'Physician ', 10754, 'neuropatic', 'neuropathic'),
                         (1529739, 134455.0, '2122-06-10', '2122-06-10 17:31:00', '2122-06-10 18:23:00', 'Nursing/other', 279, 'neuropatic', 'neuropathic'),
                         (381223, 181779.0, '2158-04-03', '2158-04-03 12:33:00', '2158-04-03 12:33:56', 'Physician ', 344, 'meteprolol', 'metoprolol'),
                         (1589361, 184509.0, '2200-02-28', '2200-02-28 16:00:00', '2200-02-28 16:50:00', 'Nursing/other', 422, 'meteprolol', 'metoprolol'),
                         (1807612, 107653.0, '2142-02-06', '2142-02-06 05:47:00', '2142-02-06 06:03:00', 'Nursing/other', 793, 'meteprolol', 'metoprolol'),
                         (1873533, 180826.0, '2171-06-23', '2171-06-23 15:35:00', '2171-06-23 15:49:00', 'Nursing/other', 441, 'meteprolol', 'metoprolol'),
                         (1884993, 164929.0, '2117-11-03', '2117-11-03 04:49:00', '2117-11-03 05:28:00', 'Nursing/other', 1019, 'meteprolol', 'metoprolol'),
                         (1917656, 106020.0, '2178-03-13', '2178-03-13 05:46:00', '2178-03-13 06:02:00', 'Nursing/other', 971, 'calmmed', 'calmed'),
                         (22694, 119463.0, '2121-12-05', float('nan'), float('nan'), 'Discharge summary', 1107, 'deniesd', 'denied'),
                         (16047, 128487.0, '2189-02-07', float('nan'), float('nan'), 'Discharge summary', 2018, 'denises', 'denies'),
                         (25478, 178410.0, '2106-12-18', float('nan'), float('nan'), 'Discharge summary', 809, 'denises', 'denies'),
                         (558586, 159957.0, '2105-11-18', '2105-11-18 10:27:00', '2105-11-18 15:42:28', 'Physician ', 5593, 'denises', 'denies'),
                         (1917163, 131223.0, '2159-10-17', '2159-10-17 03:53:00', '2159-10-17 04:15:00', 'Nursing/other', 771, 'denises', 'denies'),
                         (369526, 152264.0, '2179-12-30', '2179-12-30 21:29:00', '2179-12-30 21:30:09', 'Physician ', 910, 'remining', 'remaining'),
                         (423665, 136301.0, '2178-08-20', '2178-08-20 09:04:00', '2178-08-20 09:32:18', 'Physician ', 6019, 'remining', 'remaining'),
                         (644099, 114246.0, '2114-11-10', '2114-11-10 09:02:00', '2114-11-10 15:05:25', 'Physician ', 8446, 'remining', 'remaining'),
                         (1260874, 178990.0, '2108-03-16', '2108-03-16 06:15:00', '2108-03-16 06:29:00', 'Nursing/other', 1198, 'remining', 'remaining'),
                         (859167, 144362.0, '2159-06-01', '2159-06-01 16:13:00', float('nan'), 'Radiology', 306, 'thrach', 'trach'),
                         (1182313, 118636.0, '2163-05-22', '2163-05-22 05:04:00', float('nan'), 'Radiology', 356, 'thrach', 'trach'),
                         (1510482, 115711.0, '2123-02-23', '2123-02-23 14:55:00', '2123-02-23 15:06:00', 'Nursing/other', 453, 'thrach', 'trach'),
                         (1806538, 188392.0, '2112-01-21', '2112-01-21 18:30:00', '2112-01-21 18:34:00', 'Nursing/other', 550, 'thrach', 'trach'),
                         (1806548, 102209.0, '2111-09-29', '2111-09-29 16:33:00', '2111-09-29 16:52:00', 'Nursing/other', 540, 'thrach', 'trach'),
                         (28650, 152155.0, '2191-09-23', float('nan'), float('nan'), 'Discharge summary', 7632, 'zyprexax', 'zyprexa'),
                         (1283307, 162590.0, '2137-03-25', '2137-03-25 17:23:00', '2137-03-25 17:56:00', 'Nursing/other', 2325, 'bvery', 'very'),
                         (1323636, 161153.0, '2166-02-16', '2166-02-16 07:03:00', '2166-02-16 07:10:00', 'Nursing/other', 76, 'bvery', 'very'),
                         (1336579, 150376.0, '2144-03-24', '2144-03-24 03:54:00', '2144-03-24 03:59:00', 'Nursing/other', 420, 'bvery', 'very'),
                         (1848550, 169510.0, '2148-04-12', '2148-04-12 15:26:00', '2148-04-12 15:29:00', 'Nursing/other', 385, 'bvery', 'very'),
                         (1914733, 113935.0, '2131-08-07', '2131-08-07 21:50:00', '2131-08-07 22:00:00', 'Nursing/other', 1134, 'bvery', 'very'),
                         (20829, 122958.0, '2138-09-13', float('nan'), float('nan'), 'Discharge summary', 1221, 'holosstolic', 'holosystolic'),
                         (1556989, 123244.0, '2100-12-19', '2100-12-19 16:48:00', '2100-12-19 17:09:00', 'Nursing/other', 1830, 'deveopling', 'developing'),
                         (1267423, 168294.0, '2130-05-14', '2130-05-14 05:41:00', '2130-05-14 05:55:00', 'Nursing/other', 961, 'resuults', 'results'),
                         (683325, 171088.0, '2106-03-29', '2106-03-29 10:47:00', '2106-03-29 10:51:13', 'Nursing', 940, 'brochodilations', 'bronchodilations'),
                         (1138693, float('nan'), '2102-06-20', '2102-06-20 09:47:00', float('nan'), 'Radiology', 1061, 'abnormalityin', 'abnormality'),
                         (1896669, 149253.0, '2158-05-22', '2158-05-22 06:27:00', '2158-05-22 06:59:00', 'Nursing/other', 1965, 'coolaborate', 'collaborate'),
                         (1517160, 199629.0, '2191-04-19', '2191-04-19 16:32:00', '2191-04-19 16:56:00', 'Nursing/other', 874, 'isordril', 'isordil'),
                         (1528363, 145090.0, '2175-05-18', '2175-05-18 15:06:00', '2175-05-18 15:28:00', 'Nursing/other', 902, 'isordril', 'isordil'),
                         (1177165, float('nan'), '2172-04-09', '2172-04-09 14:17:00', float('nan'), 'Radiology', 652, 'parafaulcine', 'parafalcine'),
                         (1113587, 145527.0, '2133-01-26', '2133-01-26 14:47:00', float('nan'), 'Radiology', 785, 'consoliodation', 'consolidation'),
                         (1242431, 113533.0, '2106-08-19', '2106-08-19 10:03:00', float('nan'), 'Radiology', 183, 'cholangiis', 'cholangitis'),
                         (1242455, 113533.0, '2106-08-19', '2106-08-19 11:59:00', float('nan'), 'Radiology', 183, 'cholangiis', 'cholangitis'),
                         (1242455, 113533.0, '2106-08-19', '2106-08-19 11:59:00', float('nan'), 'Radiology', 3398, 'cholangiis', 'cholangitis'),
                         (46062, 197688.0, '2142-10-07', float('nan'), float('nan'), 'Discharge summary', 1838, 'atributes', 'attributes'),
                         (20661, 125677.0, '2161-05-29', float('nan'), float('nan'), 'Discharge summary', 1492, 'alond', 'along'),
                         (26537, 143476.0, '2179-02-16', float('nan'), float('nan'), 'Discharge summary', 4359, 'alond', 'along'),
                         (26069, 142003.0, '2167-03-01', float('nan'), float('nan'), 'Discharge summary', 602, 'alond', 'along'),
                         (1546839, 162467.0, '2146-10-05', '2146-10-05 18:41:00', '2146-10-05 18:48:00', 'Nursing/other', 488, 'alond', 'along'),
                         (1732134, 128421.0, '2111-03-09', '2111-03-09 11:46:00', '2111-03-09 11:48:00', 'Nursing/other', 203, 'alond', 'along'),
                         (354401, float('nan'), '2104-01-30', '2104-01-30 19:23:00', '2104-01-30 19:23:25', 'General', 405, 'satruation', 'saturation'),
                         (346428, 187801.0, '2154-12-15', '2154-12-15 11:27:00', '2154-12-15 11:28:06', 'Physician ', 4179, 'incrwase', 'increase'),
                         (751877, 102005.0, '2177-04-05', '2177-04-05 16:59:00', float('nan'), 'Radiology', 1052, 'patecheal', 'petechial'),
                         (751877, 102005.0, '2177-04-05', '2177-04-05 16:59:00', float('nan'), 'Radiology', 1471, 'patecheal', 'petechial'),
                         (9985, 182868.0, '2159-09-13', float('nan'), float('nan'), 'Discharge summary', 15411, 'tramodal', 'tramadol'),
                         (47142, 144587.0, '2168-10-13', float('nan'), float('nan'), 'Discharge summary', 13330, 'tramodal', 'tramadol'),
                         (1290167, 166617.0, '2186-02-15', '2186-02-15 05:45:00', '2186-02-15 06:08:00', 'Nursing/other', 131, 'evenin', 'evening'),
                         (1345788, 186165.0, '2165-01-19', '2165-01-19 02:24:00', '2165-01-19 02:32:00', 'Nursing/other', 993, 'evenin', 'evening'),
                         (1443865, 164075.0, '2111-01-19', '2111-01-19 06:57:00', '2111-01-19 07:13:00', 'Nursing/other', 541, 'evenin', 'evening'),
                         (1606207, 133123.0, '2197-07-25', '2197-07-25 18:45:00', '2197-07-25 18:47:00', 'Nursing/other', 113, 'evenin', 'evening'),
                         (1813421, 163039.0, '2131-11-28', '2131-11-28 17:31:00', '2131-11-28 18:17:00', 'Nursing/other', 3026, 'evenin', 'evening'),
                         (57985, float('nan'), '2102-07-27', float('nan'), float('nan'), 'Echo', 1087, 'infeoseptal', 'inferoseptal'),
                         (421796, 108311.0, '2190-08-19', '2190-08-19 09:46:00', '2190-08-19 09:46:50', 'Physician ', 4604, 'resusitaton', 'resuscitation'),
                         (1143447, 185604.0, '2135-08-20', '2135-08-20 11:46:00', float('nan'), 'Radiology', 1452, 'dehiscene', 'dehiscence'),
                         (318347, 172102.0, '2137-06-22', '2137-06-22 07:06:00', '2137-06-22 12:12:24', 'Physician ', 8298, 'alverolar', 'alveolar'),
                         (318528, 172102.0, '2137-06-23', '2137-06-23 07:29:00', '2137-06-23 07:36:25', 'Physician ', 7456, 'alverolar', 'alveolar'),
                         (635861, 103989.0, '2136-07-27', '2136-07-27 16:17:00', '2136-07-27 16:17:12', 'Physician ', 840, 'alverolar', 'alveolar'),
                         (641414, 199699.0, '2177-07-12', '2177-07-12 20:06:00', '2177-07-12 20:06:48', 'Nursing', 1889, 'barrieer', 'barrier'),
                         (1898694, 184444.0, '2158-10-05', '2158-10-05 13:40:00', '2158-10-05 14:27:00', 'Nursing/other', 673, 'barrieer', 'barrier'),
                         (37747, 125479.0, '2132-02-23', float('nan'), float('nan'), 'Discharge summary', 5690, 'preciptitously', 'precipitously'),
                         (459079, 132888.0, '2151-01-09', '2151-01-09 18:12:00', '2151-01-09 18:12:51', 'Nursing', 108, 'preciptitously', 'precipitously'),
                         (1665323, 165367.0, '2200-08-12', '2200-08-12 06:17:00', '2200-08-12 06:35:00', 'Nursing/other', 821, 'fluctuatio', 'fluctuation'),
                         (1739834, 178889.0, '2117-12-03', '2117-12-03 03:15:00', '2117-12-03 03:15:00', 'Nursing/other', 69, 'dyamics', 'dynamics'),
                         (1739834, 178889.0, '2117-12-03', '2117-12-03 03:15:00', '2117-12-03 03:15:00', 'Nursing/other', 298, 'dyamics', 'dynamics'),
                         (3359, 108845.0, '2188-09-05', float('nan'), float('nan'), 'Discharge summary', 5906, 'autocoidal', 'autacoidal'),
                         (678734, 170748.0, '2128-11-20', '2128-11-20 10:10:00', '2128-11-20 10:10:48', 'Physician ', 5893, 'doubel', 'double'),
                         (1147100, 183676.0, '2187-09-12', '2187-09-12 15:27:00', float('nan'), 'Radiology', 444, 'doubel', 'double'),
                         (1190798, float('nan'), '2187-06-21', '2187-06-21 11:06:00', float('nan'), 'Radiology', 3615, 'doubel', 'double'),
                         (1267402, 128369.0, '2145-05-30', '2145-05-30 17:40:00', '2145-05-30 17:53:00', 'Nursing/other', 124, 'doubel', 'double'),
                         (41191, 140690.0, '2156-02-25', float('nan'), float('nan'), 'Discharge summary', 4074, 'mimick', 'mimic'),
                         (732097, 148216.0, '2150-11-09', '2150-11-09 05:17:00', '2150-11-09 05:17:27', 'Nursing', 1105, 'mimick', 'mimic'),
                         (1232862, float('nan'), '2184-04-28', '2184-04-28 15:28:00', float('nan'), 'Radiology', 1401, 'mimick', 'mimic'),
                         (1686659, 114019.0, '2152-12-05', '2152-12-05 10:53:00', '2152-12-05 10:56:00', 'Nursing/other', 107, 'mimick', 'mimic'),
                         (1878380, 182415.0, '2144-02-18', '2144-02-18 06:03:00', '2144-02-18 06:17:00', 'Nursing/other', 780, 'mimick', 'mimic'),
                         (367555, 103921.0, '2162-02-18', '2162-02-18 10:07:00', '2162-02-18 15:54:11', 'Physician ', 5736, 'phebilitis', 'phlebitis'),
                         (39173, 110391.0, '2118-02-02', float('nan'), float('nan'), 'Discharge summary', 6699, 'difinitely', 'definitely'),
                         (892563, 180878.0, '2171-01-19', '2171-01-19 15:55:00', float('nan'), 'Radiology', 176, 'exteme', 'extreme'),
                         (1510710, 136739.0, '2176-01-14', '2176-01-14 15:54:00', '2176-01-14 17:25:00', 'Nursing/other', 925, 'exteme', 'extreme'),
                         (1657731, 193108.0, '2133-03-15', '2133-03-15 18:08:00', '2133-03-15 18:30:00', 'Nursing/other', 910, 'exteme', 'extreme'),
                         (1652499, 137693.0, '2202-03-28', '2202-03-28 06:19:00', '2202-03-28 06:47:00', 'Nursing/other', 1502, 'exteme', 'extreme'),
                         (1777585, 160080.0, '2196-09-04', '2196-09-04 16:49:00', '2196-09-04 17:53:00', 'Nursing/other', 574, 'exteme', 'extreme'),
                         (4118, 121445.0, '2179-07-15', float('nan'), float('nan'), 'Discharge summary', 4823, 'limite', 'limited'),
                         (897843, 110786.0, '2165-04-15', '2165-04-15 00:07:00', float('nan'), 'Radiology', 969, 'limite', 'limited'),
                         (1007663, 195418.0, '2184-06-15', '2184-06-15 15:11:00', float('nan'), 'Radiology', 1840, 'limite', 'limited'),
                         (1062329, float('nan'), '2164-01-30', '2164-01-30 07:58:00', float('nan'), 'Radiology', 1315, 'limite', 'limited'),
                         (1180012, 102605.0, '2154-04-25', '2154-04-25 07:41:00', float('nan'), 'Radiology', 1494, 'limite', 'limited'),
                         (25914, 142065.0, '2178-07-12', float('nan'), float('nan'), 'Discharge summary', 16938, 'proporanolol', 'propranolol'),
                         (1378808, 125787.0, '2161-03-27', '2161-03-27 12:11:00', '2161-03-27 12:18:00', 'Nursing/other', 382, 'proporanolol', 'propranolol'),
                         (1821322, 111585.0, '2128-01-26', '2128-01-26 08:14:00', '2128-01-26 08:41:00', 'Nursing/other', 1364, 'requiriemnts', 'requirements'),
                         (395913, 133416.0, '2130-04-22', '2130-04-22 13:36:00', '2130-04-22 16:01:23', 'Nursing', 2629, 'splemectomy', 'splenectomy'),
                         (444148, 127008.0, '2132-10-20', '2132-10-20 05:01:00', '2132-10-20 08:11:43', 'Physician ', 8512, 'splemectomy', 'splenectomy'),
                         (1303277, 138597.0, '2194-06-06', '2194-06-06 06:22:00', '2194-06-06 06:50:00', 'Nursing/other', 646, 'splemectomy', 'splenectomy'),
                         (46377, 128609.0, '2107-03-03', float('nan'), float('nan'), 'Discharge summary', 20031, 'psitacci', 'psittaci'),
                         (843825, float('nan'), '2198-11-05', '2198-11-05 07:39:00', float('nan'), 'Radiology', 4824, 'untin', 'until'),
                         (1552501, 160416.0, '2173-08-28', '2173-08-28 16:55:00', '2173-08-28 17:28:00', 'Nursing/other', 341, 'untin', 'until'),
                         (1509721, 185497.0, '2158-02-06', '2158-02-06 15:17:00', '2158-02-06 15:28:00', 'Nursing/other', 78, 'transprot', 'transport'),
                         (1984477, 178604.0, '2179-06-29', '2179-06-29 06:19:00', '2179-06-29 06:44:00', 'Nursing/other', 2201, 'transprot', 'transport'),
                         (30598, 165603.0, '2119-04-07', float('nan'), float('nan'), 'Discharge summary', 745, 'chesp', 'chest'),
                         (49673, 152278.0, '2122-03-19', float('nan'), float('nan'), 'Discharge summary', 11008, 'chesp', 'chest'),
                         (589389, 168842.0, '2129-01-14', '2129-01-14 14:52:00', '2129-01-14 19:55:49', 'Nursing', 841, 'chesp', 'chest'),
                         (722055, 174051.0, '2109-07-13', '2109-07-13 12:19:00', '2109-07-13 12:24:51', 'Nursing', 14, 'chesp', 'chest'),
                         (1260203, 195251.0, '2186-01-31', '2186-01-31 20:03:00', '2186-01-31 20:18:00', 'Nursing/other', 149, 'chesp', 'chest'),
                         (344260, 195662.0, '2187-12-30', '2187-12-30 05:27:00', '2187-12-30 06:59:27', 'Nursing', 1074, 'temporaty', 'temporary'),
                         (570163, 148152.0, '2138-01-10', '2138-01-10 10:42:00', '2138-01-10 10:48:40', 'Nutrition', 2152, 'temporaty', 'temporary'),
                         (747253, 149130.0, '2146-12-30', '2146-12-30 14:06:00', float('nan'), 'Radiology', 1234, 'temporaty', 'temporary'),
                         (1472464, 191175.0, '2128-07-14', '2128-07-14 03:33:00', '2128-07-14 03:50:00', 'Nursing/other', 458, 'temporaty', 'temporary'),
                         (2013945, 148086.0, '2145-08-13', '2145-08-13 18:52:00', '2145-08-13 19:03:00', 'Nursing/other', 219, 'temporaty', 'temporary'),
                         (732642, 157175.0, '2162-08-26', '2162-08-26 20:57:00', '2162-08-26 23:26:47', 'Physician ', 3487, 'uperr', 'upper'),
                         (899206, 160063.0, '2105-02-11', '2105-02-11 17:20:00', float('nan'), 'Radiology', 333, 'uperr', 'upper'),
                         (2017435, 187323.0, '2106-02-13', '2106-02-13 05:22:00', '2106-02-13 05:38:00', 'Nursing/other', 482, 'uperr', 'upper'),
                         (24769, 185910.0, '2166-09-04', float('nan'), float('nan'), 'Discharge summary', 13207, 'imaginig', 'imaging'),
                         (812884, float('nan'), '2190-02-19', '2190-02-19 13:56:00', float('nan'), 'Radiology', 5307, 'arteriogrphy', 'arteriography'),
                         (838390, 199863.0, '2125-09-15', '2125-09-15 20:49:00', float('nan'), 'Radiology', 3333, 'arteriogrphy', 'arteriography'),
                         (44786, 135967.0, '2174-11-13', float('nan'), float('nan'), 'Discharge summary', 5788, 'stabilizind', 'stabilizing'),
                         (830875, 190749.0, '2155-07-07', '2155-07-07 14:59:00', float('nan'), 'Radiology', 344, 'intubabation', 'intubation'),
                         (829509, 190749.0, '2155-06-24', '2155-06-24 20:27:00', float('nan'), 'Radiology', 390, 'intubabation', 'intubation'),
                         (829474, 190749.0, '2155-06-24', '2155-06-24 14:58:00', float('nan'), 'Radiology', 382, 'intubabation', 'intubation'),
                         (36310, 165250.0, '2168-06-16', float('nan'), float('nan'), 'Discharge summary', 14263, 'rehospialization', 'rehospitalization'),
                         (1154473, 110988.0, '2141-09-06', '2141-09-06 08:34:00', float('nan'), 'Radiology', 581, 'generenerative', 'generative'),
                         (1891187, 141067.0, '2107-03-07', '2107-03-07 06:43:00', '2107-03-07 06:59:00', 'Nursing/other', 174, 'strpng', 'strong'),
                         (40269, 177965.0, '2154-11-28', float('nan'), float('nan'), 'Discharge summary', 6947, 'electrocradiogram', 'electrocardiogram'),
                         (27914, 139136.0, '2106-11-30', float('nan'), float('nan'), 'Discharge summary', 3938, 'dicsontinued', 'discontinued'),
                         (47206, 118181.0, '2193-11-18', float('nan'), float('nan'), 'Discharge summary', 9175, 'dicsontinued', 'discontinued'),
                         (49683, 127098.0, '2117-06-14', float('nan'), float('nan'), 'Discharge summary', 13184, 'dicsontinued', 'discontinued'),
                         (454964, 121846.0, '2112-01-31', '2112-01-31 17:13:00', '2112-01-31 17:13:20', 'Nutrition', 1568, 'dicsontinued', 'discontinued'),
                         (30624, 127230.0, '2150-08-19', float('nan'), float('nan'), 'Discharge summary', 9288, 'lukocytosis', 'leukocytosis'),
                         (537024, 199580.0, '2151-09-02', '2151-09-02 17:08:00', '2151-09-02 17:48:55', 'Nursing', 1566, 'lukocytosis', 'leukocytosis'),
                         (853, 192860.0, '2116-06-10', float('nan'), float('nan'), 'Discharge summary', 5088, 'divertculi', 'diverticuli'),
                         (496074, 188434.0, '2188-11-19', '2188-11-19 18:45:00', '2188-11-19 18:46:05', 'Nursing', 1685, 'sduring', 'during'),
                         (1815982, 177323.0, '2126-03-23', '2126-03-23 19:39:00', '2126-03-23 20:02:00', 'Nursing/other', 2099, 'sduring', 'during'),
                         (522823, 140856.0, '2137-03-31', '2137-03-31 07:19:00', '2137-03-31 15:47:50', 'Physician ', 3255, 'erythemay', 'erythema'),
                         (382423, 114524.0, '2108-03-24', '2108-03-24 05:57:00', '2108-03-24 06:35:19', 'Physician ', 2322, 'identifie', 'identified'),
                         (748692, float('nan'), '2190-02-25', '2190-02-25 11:21:00', float('nan'), 'Radiology', 4008, 'identifie', 'identified'),
                         (816320, 140222.0, '2167-05-06', '2167-05-06 18:18:00', float('nan'), 'Radiology', 2239, 'identifie', 'identified'),
                         (1152359, 151596.0, '2108-11-13', '2108-11-13 19:57:00', float('nan'), 'Radiology', 241, 'identifie', 'identified'),
                         (2074291, 136365.0, '2181-09-04', '2181-09-04 14:53:00', '2181-09-04 15:00:00', 'Nursing/other', 361, 'identifie', 'identified'),
                         (358775, 197836.0, '2140-01-20', '2140-01-20 07:00:00', '2140-01-20 07:00:49', 'Physician ', 3768, 'caucasaian', 'caucasian'),
                         (1008991, 104918.0, '2123-04-22', '2123-04-22 10:50:00', float('nan'), 'Radiology', 192, 'sclerosin', 'sclerosing'),
                         (1004927, float('nan'), '2184-05-05', '2184-05-05 15:18:00', float('nan'), 'Radiology', 190, 'sclerosin', 'sclerosing'),
                         (1004927, float('nan'), '2184-05-05', '2184-05-05 15:18:00', float('nan'), 'Radiology', 3485, 'sclerosin', 'sclerosing'),
                         (1004369, float('nan'), '2184-05-01', '2184-05-01 13:13:00', float('nan'), 'Radiology', 198, 'sclerosin', 'sclerosing'),
                         (1531992, 112346.0, '2125-05-17', '2125-05-17 02:45:00', '2125-05-17 02:53:00', 'Nursing/other', 827, 'brinsg', 'brings'),
                         (1535780, 129693.0, '2174-08-03', '2174-08-03 16:33:00', '2174-08-03 16:39:00', 'Nursing/other', 698, 'brinsg', 'brings'),
                         (1534227, 152442.0, '2195-07-01', '2195-07-01 01:46:00', '2195-07-01 01:50:00', 'Nursing/other', 766, 'brinsg', 'brings'),
                         (1744935, 112346.0, '2125-05-09', '2125-05-09 15:36:00', '2125-05-09 15:45:00', 'Nursing/other', 1171, 'brinsg', 'brings'),
                         (1970227, 163055.0, '2142-09-24', '2142-09-24 01:20:00', '2142-09-24 01:24:00', 'Nursing/other', 533, 'brinsg', 'brings'),
                         (1274102, 190912.0, '2142-08-02', '2142-08-02 06:25:00', '2142-08-02 06:36:00', 'Nursing/other', 1833, 'coxycx', 'coccyx'),
                         (1692431, 175469.0, '2157-08-07', '2157-08-07 05:11:00', '2157-08-07 05:17:00', 'Nursing/other', 1042, 'coxycx', 'coccyx'),
                         (1679320, 115088.0, '2173-08-14', '2173-08-14 17:24:00', '2173-08-14 17:37:00', 'Nursing/other', 1655, 'coxycx', 'coccyx'),
                         (1805319, 190912.0, '2142-07-31', '2142-07-31 06:20:00', '2142-07-31 06:28:00', 'Nursing/other', 1692, 'coxycx', 'coccyx'),
                         (41391, 123509.0, '2174-02-28', float('nan'), float('nan'), 'Discharge summary', 1017, 'spubsided', 'subsided'),
                         (1586519, 123472.0, '2164-02-09', '2164-02-09 18:24:00', '2164-02-09 18:36:00', 'Nursing/other', 1010, 'tthey', 'they'),
                         (331966, 167587.0, '2200-08-27', '2200-08-27 02:55:00', '2200-08-27 03:19:05', 'Nursing', 649, 'attempst', 'attempts'),
                         (845695, 168233.0, '2107-02-01', '2107-02-01 13:39:00', float('nan'), 'Radiology', 469, 'attempst', 'attempts'),
                         (1788623, 174274.0, '2110-04-07', '2110-04-07 16:52:00', '2110-04-07 17:28:00', 'Nursing/other', 1694, 'attempst', 'attempts'),
                         (423615, 187089.0, '2181-08-20', '2181-08-20 00:40:00', '2181-08-20 05:48:42', 'Nursing', 1310, 'ventriculostmy', 'ventriculostomy'),
                         (516809, 175792.0, '2157-09-08', '2157-09-08 20:02:00', '2157-09-08 20:24:07', 'Nursing', 1838, 'ventriculostmy', 'ventriculostomy'),
                         (1804605, 166102.0, '2147-01-04', '2147-01-04 18:35:00', '2147-01-04 18:45:00', 'Nursing/other', 459, 'ventriculostmy', 'ventriculostomy'),
                         (1985493, 112558.0, '2117-01-02', '2117-01-02 04:47:00', '2117-01-02 05:14:00', 'Nursing/other', 1443, 'ventriculostmy', 'ventriculostomy'),
                         (1806543, 138715.0, '2194-10-07', '2194-10-07 07:57:00', '2194-10-07 08:52:00', 'Nursing/other', 2199, 'activiety', 'activity'),
                         (1813749, 110241.0, '2119-11-16', '2119-11-16 18:51:00', '2119-11-16 19:01:00', 'Nursing/other', 1037, 'activiety', 'activity'),
                         (5844, 119121.0, '2146-06-01', float('nan'), float('nan'), 'Discharge summary', 4483, 'ankes', 'ankles'),
                         (25193, 196065.0, '2180-09-02', float('nan'), float('nan'), 'Discharge summary', 2103, 'ankes', 'ankles'),
                         (316248, 152639.0, '2199-07-05', '2199-07-05 16:46:00', '2199-07-05 17:45:13', 'Nursing', 1919, 'ankes', 'ankles'),
                         (35240, 137812.0, '2100-09-28', float('nan'), float('nan'), 'Discharge summary', 4609, 'cconjunctival', 'conjunctival'),
                         (441907, 137812.0, '2100-09-17', '2100-09-17 06:32:00', '2100-09-17 06:45:53', 'Physician ', 6417, 'cconjunctival', 'conjunctival'),
                         (595912, 174622.0, '2115-05-03', '2115-05-03 05:17:00', '2115-05-03 07:31:28', 'Physician ', 11976, 'abrrogate', 'abrogate'),
                         (1266, 141098.0, '2157-03-25', float('nan'), float('nan'), 'Discharge summary', 3393, 'ellicited', 'elicited'),
                         (2683, 122915.0, '2154-09-19', float('nan'), float('nan'), 'Discharge summary', 1735, 'ellicited', 'elicited'),
                         (10592, 153529.0, '2181-07-14', float('nan'), float('nan'), 'Discharge summary', 7257, 'ellicited', 'elicited'),
                         (827713, 153529.0, '2181-06-30', '2181-06-30 09:48:00', float('nan'), 'Radiology', 1302, 'ellicited', 'elicited'),
                         (1457073, 198274.0, '2106-05-05', '2106-05-05 17:54:00', '2106-05-05 18:02:00', 'Nursing/other', 1055, 'ellicited', 'elicited'),
                         (37877, 128859.0, '2143-10-15', float('nan'), float('nan'), 'Discharge summary', 21828, 'levatiracetem', 'levetiracetam'),
                         (20274, 133613.0, '2122-05-15', float('nan'), float('nan'), 'Discharge summary', 454, 'resulatant', 'resultant'),
                         (1965200, 109296.0, '2111-12-27', '2111-12-27 07:24:00', '2111-12-27 07:30:00', 'Nursing/other', 942, 'anitiboitc', 'antibiotic'),
                         (50925, 122252.0, '2106-01-16', float('nan'), float('nan'), 'Discharge summary', 4459, 'smade', 'made'),
                         (1283331, 196858.0, '2163-03-11', '2163-03-11 05:40:00', '2163-03-11 06:19:00', 'Nursing/other', 727, 'smade', 'made'),
                         (3894, 158782.0, '2122-02-27', float('nan'), float('nan'), 'Discharge summary', 1625, 'baselilne', 'baseline'),
                         (51825, 154963.0, '2145-05-04', float('nan'), float('nan'), 'Discharge summary', 18247, 'baselilne', 'baseline'),
                         (1483381, 108050.0, '2128-09-07', '2128-09-07 05:50:00', '2128-09-07 06:09:00', 'Nursing/other', 138, 'baselilne', 'baseline'),
                         (2001508, 157734.0, '2135-07-14', '2135-07-14 17:29:00', '2135-07-14 17:38:00', 'Nursing/other', 182, 'baselilne', 'baseline'),
                         (9521, 166000.0, '2113-10-12', float('nan'), float('nan'), 'Discharge summary', 8887, 'gastroesophgeal', 'gastroesophageal'),
                         (38218, 159027.0, '2105-06-29', float('nan'), float('nan'), 'Discharge summary', 9228, 'gastroesophgeal', 'gastroesophageal'),
                         (781194, 169388.0, '2187-04-18', '2187-04-18 17:14:00', float('nan'), 'Radiology', 1577, 'gastroesophgeal', 'gastroesophageal'),
                         (884223, float('nan'), '2143-11-21', '2143-11-21 12:19:00', float('nan'), 'Radiology', 355, 'gastroesophgeal', 'gastroesophageal'),
                         (1216016, 153036.0, '2154-12-09', '2154-12-09 20:25:00', float('nan'), 'Radiology', 1384, 'gastroesophgeal', 'gastroesophageal'),
                         (40117, 155071.0, '2171-02-04', float('nan'), float('nan'), 'Discharge summary', 5968, 'nictine', 'nicotine'),
                         (1593450, 181609.0, '2163-03-16', '2163-03-16 03:17:00', '2163-03-16 03:31:00', 'Nursing/other', 1070, 'nictine', 'nicotine'),
                         (66754, 181588.0, '2137-06-28', float('nan'), float('nan'), 'Echo', 735, 'partiallly', 'partially'),
                         (332894, 198762.0, '2162-10-17', '2162-10-17 10:19:00', '2162-10-17 15:50:22', 'Physician ', 3793, 'partiallly', 'partially'),
                         (1077057, float('nan'), '2178-05-26', '2178-05-26 08:55:00', float('nan'), 'Radiology', 693, 'partiallly', 'partially'),
                         (1157241, 173664.0, '2145-12-11', '2145-12-11 16:19:00', float('nan'), 'Radiology', 2401, 'partiallly', 'partially'),
                         (1457658, 131539.0, '2112-02-27', '2112-02-27 05:49:00', '2112-02-27 05:58:00', 'Nursing/other', 304, 'partiallly', 'partially'),
                         (4726, 194654.0, '2145-01-01', float('nan'), float('nan'), 'Discharge summary', 8926, 'diplocci', 'diplococci'),
                         (24495, 155705.0, '2149-07-17', float('nan'), float('nan'), 'Discharge summary', 11627, 'diplocci', 'diplococci'),
                         (34291, 168208.0, '2100-11-05', float('nan'), float('nan'), 'Discharge summary', 1969, 'diplocci', 'diplococci'),
                         (659874, 147384.0, '2186-10-26', '2186-10-26 10:12:00', '2186-10-26 12:36:46', 'Nursing', 816, 'diplocci', 'diplococci'),
                         (2018528, 191038.0, '2172-12-23', '2172-12-23 05:03:00', '2172-12-23 05:34:00', 'Nursing/other', 1061, 'diplocci', 'diplococci'),
                         (20755, 139634.0, '2113-10-25', float('nan'), float('nan'), 'Discharge summary', 9545, 'sensatations', 'sensations'),
                         (3698, 119219.0, '2192-04-04', float('nan'), float('nan'), 'Discharge summary', 16069, 'indiependence', 'independence'),
                         (29393, 148450.0, '2170-01-11', float('nan'), float('nan'), 'Discharge summary', 8223, 'chracteristics', 'characteristics'),
                         (35941, 130039.0, '2175-01-24', float('nan'), float('nan'), 'Discharge summary', 9324, 'chracteristics', 'characteristics'),
                         (707917, 172343.0, '2178-02-07', '2178-02-07 06:21:00', '2178-02-07 06:21:21', 'Nursing', 1867, 'underdampened', 'underdamped'),
                         (2003821, 141570.0, '2183-05-18', '2183-05-18 17:50:00', '2183-05-18 18:09:00', 'Nursing/other', 990, 'underdampened', 'underdamped'),
                         (1385, 193880.0, '2189-02-03', float('nan'), float('nan'), 'Discharge summary', 5401, 'robatussin', 'robitussin'),
                         (24154, 122994.0, '2183-03-07', float('nan'), float('nan'), 'Discharge summary', 4357, 'robatussin', 'robitussin'),
                         (359604, 186650.0, '2141-12-23', '2141-12-23 12:58:00', '2141-12-23 17:38:07', 'Physician ', 3750, 'robatussin', 'robitussin'),
                         (1279243, 125830.0, '2174-10-14', '2174-10-14 05:24:00', '2174-10-14 05:50:00', 'Nursing/other', 955, 'robatussin', 'robitussin'),
                         (1511722, 168528.0, '2189-01-18', '2189-01-18 05:33:00', '2189-01-18 05:59:00', 'Nursing/other', 326, 'robatussin', 'robitussin'),
                         (313848, 163622.0, '2155-05-12', '2155-05-12 07:01:00', '2155-05-12 13:49:36', 'Physician ', 6883, 'adrtenergic', 'adrenergic'),
                         (313903, 163622.0, '2155-05-13', '2155-05-13 06:52:00', '2155-05-13 15:03:25', 'Physician ', 9340, 'adrtenergic', 'adrenergic'),
                         (509660, 163622.0, '2155-05-12', '2155-05-12 07:01:00', '2155-05-12 07:01:08', 'Physician ', 6871, 'adrtenergic', 'adrenergic'),
                         (519604, 163622.0, '2155-05-11', '2155-05-11 07:26:00', '2155-05-11 07:26:36', 'Physician ', 5815, 'adrtenergic', 'adrenergic'),
                         (523223, 163622.0, '2155-05-10', '2155-05-10 07:02:00', '2155-05-10 10:10:16', 'Physician ', 4487, 'adrtenergic', 'adrenergic'),
                         (38462, 146559.0, '2191-07-11', float('nan'), float('nan'), 'Discharge summary', 5376, 'purpuses', 'purposes'),
                         (48166, 154310.0, '2154-09-19', float('nan'), float('nan'), 'Discharge summary', 5619, 'purpuses', 'purposes'),
                         (204, 119078.0, '2103-08-09', float('nan'), float('nan'), 'Discharge summary', 8076, 'eppiplocae', 'epiploicae'),
                         (34076, 134253.0, '2164-06-11', float('nan'), float('nan'), 'Discharge summary', 8096, 'cisplatinin', 'cisplatin'),
                         (1137842, float('nan'), '2119-05-17', '2119-05-17 22:01:00', float('nan'), 'Radiology', 1018, 'studues', 'studies'),
                         (1304415, 192643.0, '2148-08-11', '2148-08-11 20:20:00', '2148-08-11 20:29:00', 'Nursing/other', 257, 'studues', 'studies'),
                         (1482656, 189928.0, '2117-09-11', '2117-09-11 20:17:00', '2117-09-11 20:30:00', 'Nursing/other', 790, 'studues', 'studies'),
                         (2030435, 153538.0, '2171-10-29', '2171-10-29 13:16:00', '2171-10-29 13:20:00', 'Nursing/other', 321, 'studues', 'studies'),
                         (1592282, 104891.0, '2113-03-31', '2113-03-31 05:46:00', '2113-03-31 06:00:00', 'Nursing/other', 905, 'sofeners', 'softeners'),
                         (1918389, 148161.0, '2152-08-19', '2152-08-19 19:14:00', '2152-08-19 19:35:00', 'Nursing/other', 858, 'sofeners', 'softeners'),
                         (362823, 195768.0, '2178-02-10', '2178-02-10 07:05:00', '2178-02-10 07:05:19', 'Physician ', 7246, 'creatinnine', 'creatinine'),
                         (532943, 144855.0, '2175-09-16', '2175-09-16 05:31:00', '2175-09-16 05:32:02', 'Nursing', 1989, 'creatinnine', 'creatinine'),
                         (533174, 144855.0, '2175-09-17', '2175-09-17 06:00:00', '2175-09-17 06:01:00', 'General', 1822, 'creatinnine', 'creatinine'),
                         (721805, 195768.0, '2178-02-09', '2178-02-09 05:46:00', '2178-02-09 10:37:10', 'Physician ', 6470, 'creatinnine', 'creatinine'),
                         (1298799, 129004.0, '2106-01-01', '2106-01-01 15:35:00', '2106-01-01 15:48:00', 'Nursing/other', 902, 'creatinnine', 'creatinine'),
                         (13891, 175062.0, '2194-04-12', float('nan'), float('nan'), 'Discharge summary', 3934, 'stneosis', 'stenosis'),
                         (76690, 174051.0, '2109-07-12', float('nan'), float('nan'), 'Echo', 62, 'stneosis', 'stenosis'),
                         (83435, 136606.0, '2144-07-06', float('nan'), float('nan'), 'Echo', 104, 'stneosis', 'stenosis'),
                         (485103, 185887.0, '2186-05-12', '2186-05-12 10:57:00', '2186-05-12 10:57:47', 'Nutrition', 375, 'stneosis', 'stenosis'),
                         (767581, float('nan'), '2128-10-18', '2128-10-18 13:27:00', float('nan'), 'Radiology', 7315, 'stneosis', 'stenosis'),
                         (1769215, 113299.0, '2102-04-10', '2102-04-10 19:10:00', '2102-04-10 19:38:00', 'Nursing/other', 2411, 'oharmacy', 'pharmacy'),
                         (328001, 168208.0, '2100-11-04', '2100-11-04 06:18:00', '2100-11-04 15:36:41', 'Physician ', 7944, 'espiodes', 'episodes'),
                         (5435, 145866.0, '2186-01-24', float('nan'), float('nan'), 'Discharge summary', 2353, 'successufl', 'successful'),
                         (69280, 181890.0, '2161-07-28', float('nan'), float('nan'), 'Echo', 2994, 'orificice', 'orifice'),
                         (23527, 176676.0, '2196-12-02', float('nan'), float('nan'), 'Discharge summary', 375, 'aassociated', 'associated'),
                         (23527, 176676.0, '2196-12-02', float('nan'), float('nan'), 'Discharge summary', 1346, 'aassociated', 'associated'),
                         (35137, 124594.0, '2114-10-11', float('nan'), float('nan'), 'Discharge summary', 6014, 'onocology', 'oncology'),
                         (51832, 126583.0, '2200-04-25', float('nan'), float('nan'), 'Discharge summary', 12486, 'onocology', 'oncology'),
                         (631364, 143130.0, '2178-08-27', '2178-08-27 02:05:00', '2178-08-27 02:37:02', 'Physician ', 8273, 'onocology', 'oncology'),
                         (642617, 190822.0, '2180-06-17', '2180-06-17 07:26:00', '2180-06-17 10:47:59', 'Physician ', 7628, 'onocology', 'oncology'),
                         (641586, 190822.0, '2180-06-17', '2180-06-17 07:26:00', '2180-06-17 15:01:26', 'Physician ', 7628, 'onocology', 'oncology'),
                         (53078, 141982.0, '2201-04-07', float('nan'), float('nan'), 'Discharge summary', 900, 'apasia', 'aphasia'),
                         (713437, 173221.0, '2171-05-05', '2171-05-05 18:54:00', '2171-05-05 18:54:18', 'Nursing', 314, 'apasia', 'aphasia'),
                         (750082, float('nan'), '2123-03-24', '2123-03-24 04:29:00', float('nan'), 'Radiology', 162, 'apasia', 'aphasia'),
                         (898383, 191975.0, '2171-02-28', '2171-02-28 12:52:00', float('nan'), 'Radiology', 455, 'apasia', 'aphasia'),
                         (1878157, 139374.0, '2146-11-25', '2146-11-25 15:13:00', '2146-11-25 15:27:00', 'Nursing/other', 92, 'apasia', 'aphasia'),
                         (1427826, 145214.0, '2141-11-08', '2141-11-08 15:54:00', '2141-11-08 16:11:00', 'Nursing/other', 266, 'ppulse', 'pulse'),
                         (1717478, 117655.0, '2125-04-06', '2125-04-06 17:24:00', '2125-04-06 17:52:00', 'Nursing/other', 526, 'ppulse', 'pulse')]

In [4]:
df_notes = pd.read_csv(mimic_csv_fpath, low_memory=False)
print(f'{len(df_notes)} rows')

2083180 rows


## Getting Examples

In [5]:
def is_nan(obj):
    return isinstance(obj, float) and math.isnan(obj)

def find_rows(hadm_id, chartdate, charttime, storetime, category):
    df_temp = df_notes
    
    if is_nan(hadm_id):
        df_temp = df_temp[df_temp.HADM_ID.isnull()]
    else:
        df_temp = df_temp[df_temp.HADM_ID == hadm_id]
    if len(df_temp) == 1: return df_temp
    
    if is_nan(chartdate):
        df_temp = df_temp[df_temp.CHARTDATE.isnull()]
    else:
        df_temp = df_temp[df_temp.CHARTDATE == chartdate]
    if len(df_temp) == 1: return df_temp
    
    if is_nan(charttime):
        df_temp = df_temp[df_temp.CHARTTIME.isnull()]
    else:
        df_temp = df_temp[df_temp.CHARTTIME == charttime]
    if len(df_temp) == 1: return df_temp
    
    if is_nan(storetime):
        df_temp = df_temp[df_temp.STORETIME.isnull()]
    else:
        df_temp = df_temp[df_temp.STORETIME == storetime]
    if len(df_temp) == 1: return df_temp
    
    if is_nan(category):
        df_temp = df_temp[df_temp.CATEGORY.isnull()]
    else:
        df_temp = df_temp[df_temp.CATEGORY == category]
    if len(df_temp) == 1: return df_temp
    
    return df_temp

In [6]:
row_ids, char_idxs = [], []

for i, annotation in tqdm(enumerate(clinspell_annotations), total=len(clinspell_annotations)):
    row_id, hadm_id, chartdate, charttime, storetime, category, old_char_idx, misspelling, correction = annotation
    rows = find_rows(hadm_id, chartdate, charttime, storetime, category)
    
    row, char_idx, min_dist = None, -1, max(rows.TEXT.map(len))
    for _, cand_row in rows.iterrows():
        text = cand_row.TEXT.lower()
        cand_idxs = [m.start() for m in re.finditer(misspelling, text)]
        for cand_idx in cand_idxs:
            dist = abs(old_char_idx - cand_idx)
            if dist < min_dist:
                row, char_idx, min_dist = cand_row, cand_idx, dist
    
    row_ids.append(row.ROW_ID)
    char_idxs.append(char_idx)
    assert row.TEXT[char_idx:char_idx+len(misspelling)].lower() == misspelling

  0%|          | 0/873 [00:00<?, ?it/s]

In [7]:
misspellings, corrections = list(zip(*clinspell_annotations))[-2:]

## Getting Contexts

In [8]:
def get_context_text(row_id, char_idx, misspelling):
    text = df_notes[df_notes.ROW_ID == row_id].iloc[0].TEXT
    def process_clinspell(s):
        s = re.sub(r'(?<!\n)\n(?!\n)', "\t", s)
        s = re.sub("\n\n", "\n", s)
        return s
    return process_clinspell(text[:char_idx]), process_clinspell(text[char_idx+len(misspelling):])

In [9]:
contexts = []
for row_id, char_idx, misspelling in zip(row_ids, char_idxs, misspellings):
    context = get_context_text(row_id, char_idx, misspelling)
    contexts.append(context)

## Pseudonymization

In [10]:
# Write notes with misspells
if os.path.exists(pseudo_in_dpath):
    shutil.rmtree(pseudo_in_dpath)        
if os.path.exists(pseudo_out_dpath):
    shutil.rmtree(pseudo_out_dpath)
    
os.makedirs(pseudo_in_dpath)
for i, context in enumerate(contexts):
    with open(os.path.join(pseudo_in_dpath, f'{i}_left.txt'), 'w', encoding='utf-8') as fd:
        fd.write(context[0])
    with open(os.path.join(pseudo_in_dpath, f'{i}_right.txt'), 'w', encoding='utf-8') as fd:
        fd.write(context[1])

In [11]:
# pip install requests joblib sqlalchemy gensim
! python {os.path.join(mimic_tools_dpath, 'main.py')} REPLACE \
    --input-dir {os.path.join(os.getcwd(), pseudo_in_dpath)} \
    --output-dir {os.path.join(os.getcwd(), pseudo_out_dpath)} \
    --list-dir {os.path.join(mimic_tools_dpath, 'lists')}

2022-03-22 08:58:01,311 Starting placeholder replacing
2022-03-22 08:58:01,311 Loading lists
2022-03-22 08:58:01,332 * Postal addresses: 20000 [656C Newport Court Coatesville, PA 19320 ...]
2022-03-22 08:58:01,527 * Last names: 88799 [SMITH, JOHNSON, WILLIAMS, JONES, BROWN ...]
2022-03-22 08:58:01,530 * Male first names: 1219 [JAMES, JOHN, ROBERT, MICHAEL, WILLIAM ...]
2022-03-22 08:58:01,539 * Female first names: 4275 [MARY, PATRICIA, LINDA, BARBARA, ELIZABETH ...]
2022-03-22 08:58:01,558 * Phone numbers: 20000 [(666) 372-7835, (923) 739-2644 ...]
2022-03-22 08:58:01,590 * Companies: 20000 [Ligula Aenean Gravida Ltd, Non Bibendum Sed LLC ...]
2022-03-22 08:58:01,591 * Countries: 264 [Afghanistan, Albania, Algeria, American Samoa ...]
2022-03-22 08:58:01,614 * Emails: 20000 [enim.Suspendisse.aliquet@Crasdictum.com, sapien.Cras.dolor@Curabitur.org ...]
2022-03-22 08:58:01,615 * Holiday names: 187 [Administrative Professionals Day, Air Force Birthday ...]
2022-03-22 08:58:01,619 * Hospit

In [12]:
# Read pseudonymized notes
def process_note(note):
    note = re.sub('\n', ' ', note)
    note = re.sub('\t', ' ', note)
    note = sanitize_text(clean_text(note))
    return note

contexts_pseudonym = []
for i in range(len(contexts)):
    with open(os.path.join(pseudo_out_dpath, f'{i}_left.txt'), 'r', encoding='utf-8') as fd:
        note_left = fd.read()
        note_left = process_note(note_left)
    with open(os.path.join(pseudo_out_dpath, f'{i}_right.txt'), 'r', encoding='utf-8') as fd:
        note_right = fd.read()
        note_right = process_note(note_right)
    contexts_pseudonym.append((note_left, note_right))

In [13]:
output_data = []

for row_id, typo, correct, context in zip(row_ids, misspellings, corrections, contexts_pseudonym):    
    left = context[0].split()[-128:]
    right = context[1].split()[:128]
    row = row_id, typo, left, right, correct    
    output_data.append(row)
    print(f'{typo:30s}({row_id:7d}): left {len(left):3d} / right {len(right):3d}')

carediolgy                    (  52849): left 128 / right  37
lugns                         ( 763191): left 128 / right 128
lugns                         (1098665): left  79 / right  59
lugns                         (1974225): left  17 / right  11
lugns                         (1699618): left  10 / right   6
lugns                         (1795825): left   8 / right   6
ecchinocytes                  (  14760): left 128 / right 128
procuedure                    (  27431): left 128 / right 128
procuedure                    ( 865114): left 128 / right 128
avening                       ( 452208): left 118 / right  26
avening                       (1310213): left  52 / right 128
avening                       (1668034): left  38 / right  85
enteracept                    (   4904): left 128 / right 128
enteracept                    ( 334393): left 128 / right  78
enteracept                    ( 334268): left 128 / right 128
hepaotology                   ( 606878): left 128 / right 110
precipir

## Writing dataset

In [14]:
dataset_dpath = '../data/mimic_clinspell'
if not os.path.exists(dataset_dpath):
    os.makedirs(dataset_dpath)
    
dataset_fpath = os.path.join(dataset_dpath, 'test.tsv')
temp = []
with open(dataset_fpath, 'w') as fd:
    writer = csv.writer(fd, delimiter='\t')
    writer.writerow(['index', 'note_id', 'word', 'left', 'right', 'correct'])
    for i, row in enumerate(output_data):
        line_idx, typo, left, right, correct = row
        writer.writerow([i, line_idx, typo, ' '.join(left), ' '.join(right), correct])
        temp.append([i, line_idx, typo, ' '.join(left), ' '.join(right), correct])
        
print('Done')

Done
