In [1]:
import chardet

with open("kst_triage.csv", "rb") as f:
    result = chardet.detect(f.read(100000))  # Read a sample of the file
    print(result["encoding"])  # Print detected encoding

Windows-1252


In [2]:
import pandas as pd 

df = pd.read_csv('kst_triage.csv', delimiter = ';', encoding="Windows-1252")

In [3]:
df = df.drop(columns=['Error_group','mistriage','Disposition','Saturation','KTAS duration_min'])

In [4]:
df

Unnamed: 0,Group,Sex,Age,Patients number per hour,Arrival mode,Injury,Chief_complain,Mental,Pain,NRS_pain,SBP,DBP,HR,RR,BT,KTAS_RN,Diagnosis in ED,KTAS_expert,Length of stay_min
0,2,2,71,3,3,2,right ocular pain,1,1,2,160,100,84,18,36.6,2,Corneal abrasion,4,86
1,1,1,56,12,3,2,right forearm burn,1,1,2,137,75,60,20,36.5,4,"Burn of hand, firts degree dorsum",5,64
2,2,1,68,8,2,2,"arm pain, Lt",1,1,2,130,80,102,20,36.6,4,"Fracture of surgical neck of humerus, closed",5,862
3,1,2,71,8,1,1,ascites tapping,1,1,3,139,94,88,20,36.5,4,Alcoholic liver cirrhosis with ascites,5,108
4,1,2,58,4,3,1,"distension, abd",1,1,3,91,67,93,18,36.5,4,Ascites,5,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1262,2,2,79,5,2,1,mental change,1,0,#BOÞ!,120,80,86,22,36.4,2,Cerebral infarction due to unspecified occlusi...,2,1995
1263,2,2,81,2,3,1,dysuria,1,0,#BOÞ!,120,80,94,20,36.4,4,Dysuria,4,1000
1264,2,2,81,17,2,1,dizziness,1,0,#BOÞ!,130,90,80,20,36.2,3,Dizziness and giddiness,3,310
1265,2,1,81,2,2,2,"Sensory, Decreased",1,0,#BOÞ!,170,100,78,20,36.6,3,"Cord compression, unspecified",3,475


In [5]:
import pandas as pd

# Get a list of all columns with dtype 'object'
object_cols = df.select_dtypes(include=['object']).columns

# Loop over each object column and try to convert it to an integer
for col in object_cols:
    if col not in ['Chief_complain','Diagnosis in ED']:
        # Attempt to convert using pd.to_numeric with errors='coerce'
        converted = pd.to_numeric(df[col], errors='coerce')
        # Optionally, if the numeric conversion produced float values (e.g., 5.0, 7.0), convert them to int
        df[col] = converted.astype(float)


# Optionally, check the data types after conversion:
print("\nDataFrame dtypes after conversion:")
print(df.dtypes)



DataFrame dtypes after conversion:
Group                         int64
Sex                           int64
Age                           int64
Patients number per hour      int64
Arrival mode                  int64
Injury                        int64
Chief_complain               object
Mental                        int64
Pain                          int64
NRS_pain                    float64
SBP                         float64
DBP                         float64
HR                          float64
RR                          float64
BT                          float64
KTAS_RN                       int64
Diagnosis in ED              object
KTAS_expert                   int64
Length of stay_min            int64
dtype: object


In [None]:
import pandas as pd

# -----------------------------------------------
# 1) Split the original dataframe by the 'Group' column
# -----------------------------------------------
train_numeric = df[df["Group"] == 1].copy()
test_numeric  = df[df["Group"] == 2].copy()

# Save the numeric (untransformed) dataframes as CSV files
train_numeric.to_csv("train_numeric.csv", index=False)
test_numeric.to_csv("test_numeric.csv", index=False)

# ---------------------------------------------------------
# 2. Define the mapping dictionaries for the required columns
# ---------------------------------------------------------
# Mapping for Injury (Reason Visit): 1 = No, 2 = Yes
injury_map = {1: "No", 2: "Yes"}

# Mapping for Gender (Sex): 1 = Female, 2 = Male
sex_map = {1: "Female", 2: "Male"}

# Mapping for Mental (Menatal): 1 = Alert, 2 = Verbal Response, 3 = Pain Response, 4 = Unresponsive
mental_map = {1: "Alert", 2: "Verbal Response", 3: "Pain Response", 4: "Unresponsive"}

# Mapping for Mode of Arrival (Arrival mode):
# 1 = Walking, 2 = Public Ambulance, 3 = Private Vehicle, 4 = Private Ambulance
# and if the value is not 1,2,3,4 (e.g. 5,6,7, or any other) then we map it to "Other"
arrival_map = {1: "Walking", 2: "119 Ambulance", 3: "Private Vehicle", 4: "Private Ambulance", 5 : 'Public transportation', 6: 'Wheelchair', 7: 'Others'}

# Mapping for Disposition:
# 1 = Discharge, 2 = Admission to ward, 3 = Admission to ICU, 4 = Discharge,
# 5 = Transfer, 6 = Death, 7 = Surgery
# disposition_map = {
#     1: "Discharge",
#     2: "Admission to ward",
#     3: "Admission to ICU",
#     4: "AMA Discharge",  
#     5: "Transfer",
#     6: "Death",
#     7: "Surgery"
# }

# --------------------------------------------
# 3. Apply the transformations to the dataframe
# --------------------------------------------
df_transformed = df.copy()

# Transform the columns using the maps
df_transformed["Injury"] = df_transformed["Injury"].map(injury_map)
df_transformed["Sex"] = df_transformed["Sex"].map(sex_map)
df_transformed["Mental"] = df_transformed["Mental"].map(mental_map)

# For Arrival mode, use a function to check the value:
def map_arrival(val):
    return arrival_map[val] if val in arrival_map else "Other"

df_transformed["Arrival mode"] = df_transformed["Arrival mode"].apply(map_arrival)

# -------------------------------------------------------
# 4. Split the transformed dataframe by 'Group' and save it
# -------------------------------------------------------
train_transformed = df_transformed[df_transformed["Group"] == 1].copy()
test_transformed  = df_transformed[df_transformed["Group"] == 2].copy()

train_transformed.to_csv("train.csv", index=False)
test_transformed.to_csv("test.csv", index=False)
