In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.set_option('display.max_columns', None)

In [None]:
test_file = '/kaggle/input/playground-series-s3e22/test.csv'
train_file = '/kaggle/input/playground-series-s3e22/train.csv'

In [None]:
test_data = pd.read_csv(test_file)
train_data = pd.read_csv(train_file)

In [None]:
train_data.head()

In [None]:
#train_data.dtypes

In [None]:
#test_data.dtypes

In [None]:
dtypes_df = pd.DataFrame({'train_dtypes':train_data.dtypes,
                         'test_dtypes':test_data.dtypes})
#dtypes_df

In [None]:
pivot_df = dtypes_df.transpose()
pivot_df

In [None]:
for column in train_data.columns:
    if train_data[column].dtypes == 'object':
        print(column)

In [None]:
for column in test_data.columns:
    if test_data[column].dtypes == 'object':
        print(column)

In [None]:
train_values_dict = {column: train_data[column].unique().tolist() for column in train_data.columns if train_data[column].dtypes == 'object'}

In [None]:
train_values_dict

In [None]:
for column in train_data.select_dtypes(include=["object"]):
    train_data[column]=train_data[column].fillna("no_measure_recorded")
    
for column in test_data.select_dtypes(include=["object"]):
    test_data[column]=test_data[column].fillna("no_measure_recorded")

In [None]:
train_values_dict = {column: train_data[column].unique().tolist() for column in train_data.columns if train_data[column].dtypes == 'object'}
test_values_dict = {column: test_data[column].unique().tolist() for column in test_data.columns if test_data[column].dtypes == 'object'}
test_values_dict

In [None]:
test_data

In [None]:
has_zero_id = (train_data['id']==0).any()
has_nan_id = train_data['id'].isnull().any()

has_zero_id
has_nan_id

In [None]:
test_data.sort_values(by='id')

In [None]:
import altair as alt

# alt.renderers.enable('default')
#alt.renderers.enable('notebook')
# alt.data_transformers.enable('json')
alt.data_transformers.disable_max_rows()

In [None]:
def assign_binary_label(row):
    if row['surgery'] == 'yes':
        return 1
    else:
        return 0

test_data['binary_label'] = test_data.apply(assign_binary_label, axis=1)

# 2. Filter out rows where age='adult'
adult_data = test_data[test_data['age'] == 'adult']

# 3. Melt the dataframe
object_columns = [col for col in test_data.columns if test_data[col].dtype == 'object']
melted_adult_data = adult_data.melt(id_vars=['binary_label'], value_vars=object_columns)

# 4. Create the Altair chart
# Separate Bar Chart
bar_chart = alt.Chart(melted_adult_data).mark_bar().encode(
    x='binary_label:N',
    y='value:N',
    color=alt.Color('binary_label:N', legend=alt.Legend(title="Surgery Performed"), 
                    scale=alt.Scale(domain=[0, 1], range=["blue", "orange"])),
    tooltip=['variable', 'value']
).properties(width=50)  # Adjusted the width for each facet

# Text Layer
text_chart = bar_chart.mark_text(
    align='center',
    baseline='middle',
    dy=-15
).encode(
    text='count()'
)

# Combine and facet
final_chart = (bar_chart + text_chart).facet(
    column=alt.Column('variable:N', header=alt.Header(labelOrient='right'))
).properties(
    title="Adult Surgery"
)

final_chart



In [None]:
base = alt.Chart(test_data).mark_bar().encode(
    alt.X("surgery:N", title="Surgery Type"),
    alt.Y("count():Q", title="Count"),
    color="surgery:N"
).properties(title="Distribution of Surgery Types")
base

In [None]:
stacked_bar = alt.Chart(test_data).mark_bar().encode(
    alt.X("pain:N"),
    alt.Y("count():Q"),
    color="surgery:N"
).properties(title="Pain Levels based on Surgery Status")
stacked_bar

In [None]:
pie = alt.Chart(test_data).mark_arc(innerRadius=50).encode(
    alt.Theta('count():Q'),
    color='pain:N',
    tooltip=['pain:N', 'count():Q']
).properties(width=300, height=300, title="Distribution of Pain Types")
pie

In [None]:
heatmap = alt.Chart(test_data).mark_rect().encode(
    alt.X('surgery:N', title='Surgery Status'),
    alt.Y('pain:N', title='Pain Level'),
    alt.Color('count():Q', legend=alt.Legend(title='Count'), scale=alt.Scale(scheme='viridis')),
    tooltip=['surgery:N', 'pain:N', 'count():Q']
).properties(width=400, height=300, title='Heatmap of Pain Level by Surgery Status')

heatmap

In [None]:
import altair as alt
import pandas as pd

# Scatter plot colored by surgery
scatter_surgery = alt.Chart(test_data).mark_circle().encode(
    x='respiratory_rate:Q',
    y='rectal_temp:Q',
    color=alt.Color('surgery:N', title='Surgery'),
    tooltip=['respiratory_rate', 'rectal_temp', 'surgery']
).properties(
    width=250,  # Enlarged width
    height=250,  # Enlarged height
    title="Colored by Surgery"
)

# Scatter plot colored by pain
scatter_pain = alt.Chart(test_data).mark_circle().encode(
    x='respiratory_rate:Q',
    y='rectal_temp:Q',
    color=alt.Color('pain:N', title='Pain'),
    tooltip=['respiratory_rate', 'rectal_temp', 'pain']
).properties(
    width=250,  # Enlarged width
    height=250,  # Enlarged height
    title="Colored by Pain"
)

# Scatter plot colored by temp_of_extremities
scatter_temp = alt.Chart(test_data).mark_circle().encode(
    x='respiratory_rate:Q',
    y='rectal_temp:Q',
    color=alt.Color('temp_of_extremities:N', title='Temp of Extremities'),
    tooltip=['respiratory_rate', 'rectal_temp', 'temp_of_extremities']
).properties(
    width=250,  # Enlarged width
    height=250,  # Enlarged height
    title="Colored by Temp of Extremities"
)

# Concatenate the plots horizontally with independent legends
concatenated_plots = (scatter_surgery | scatter_pain | scatter_temp).resolve_legend(color="independent")

concatenated_plots