## Importing necessary libraries

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
import lightningchart as lc

### LC license set 

In [2]:
with open("license_key.txt", "r") as file:  # License key is stored in 'license_key.txt'
    key = file.read()
lc.set_license(key)

### Load contents from CSV file

In [3]:
df = pd.read_csv("data/diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


### Deleting invalid entries

In [4]:
df = df[(df['Glucose'] != 0) & (df['Insulin'] != 0) & (df['BMI'] != 0)]

## Dividing age by bins and giving outcome their labels

In [5]:
ages = df["Age"]
print("Min age: ", min(ages), "\nMax: ", max(ages))

Min age:  21 
Max:  81


In [6]:
bins = [20, 30, 50, 81]
labels = ['21-30', '31-50', '51-81']

df["Age_range"] = pd.cut(df["Age"], bins=bins, labels=labels, right=True)
df["Outcome_label"] = df["Outcome"].replace({0: 'Diabetes Negative', 1: 'Diabetes Positive'})

df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Age_range,Outcome_label
3,1,89,66,23,94,28.1,0.167,21,0,21-30,Diabetes Negative
4,0,137,40,35,168,43.1,2.288,33,1,31-50,Diabetes Positive
6,3,78,50,32,88,31.0,0.248,26,1,21-30,Diabetes Positive
8,2,197,70,45,543,30.5,0.158,53,1,51-81,Diabetes Positive
13,1,189,60,23,846,30.1,0.398,59,1,51-81,Diabetes Positive
...,...,...,...,...,...,...,...,...,...,...,...
753,0,181,88,44,510,43.3,0.222,26,1,21-30,Diabetes Positive
755,1,128,88,39,110,36.5,1.057,37,1,31-50,Diabetes Positive
760,2,88,58,26,16,28.4,0.766,22,0,21-30,Diabetes Negative
763,10,101,76,48,180,32.9,0.171,63,0,51-81,Diabetes Negative


### Divide table into two (where 1 is diabetes negative, 2 is negative positive)

In [7]:
df_negative = df[df['Outcome'] == 0]
df_positive = df[df['Outcome'] == 1]

## Creating BMI vs Glucose scatter chart

Here we will make scatter chart consisting of two series - one for diabetes negative entries, other - for positive.

In [8]:
glucose_negative = df_negative["Glucose"].tolist()
BMI_negative = df_negative["BMI"].tolist()

glucose_positive = df_positive["Glucose"].tolist()
BMI_positive = df_positive["BMI"].tolist()

scatter = lc.ChartXY(   # scatter chart intialization
    theme=lc.Themes.White,  # overall theme
    title='Scatter Chart (BMI vs Glucose)', 
)
series_negative = scatter.add_point_series().add(
    x=BMI_negative,  # push samples to series
    y=glucose_negative,
)
series_positive = scatter.add_point_series().add(
    x=BMI_positive,  
    y=glucose_positive,
)

series_negative.set_point_color(lc.Color(0, 255, 0, 192)).set_name("Diabetes Negative")
series_positive.set_point_color(lc.Color(255, 0, 0, 192)).set_name("Diabetes Positive")

scatter.get_default_x_axis().set_title("BMI")
scatter.get_default_y_axis().set_title("Glucose")
scatter.add_legend().add(scatter)
scatter.open()

## Creating 3D scatter chart (Age vs Glucose vs BMI)

In [9]:
age_negative = df_negative["Age"].tolist()  # ages to list
age_positive = df_positive["Age"].tolist()

scatter3d = lc.Chart3D(  # 3d chart instance
    theme=lc.Themes.White,
    title='Chart 3D',
)

series_negative = scatter3d.add_point_series(render_2d=False).add(
    x=BMI_negative,
    y=glucose_negative,
    z=age_negative
)
series_positive = scatter3d.add_point_series(render_2d=False).add(
    x=BMI_positive,
    y=glucose_positive,
    z=age_positive
)

series_negative.set_point_color(lc.Color(0, 255, 0, 192)).set_name("Diabetes Negative")
series_positive.set_point_color(lc.Color(255, 0, 0, 192)).set_name("Diabetes Positive")


scatter3d.get_default_x_axis().set_title("BMI")  
scatter3d.get_default_y_axis().set_title("Glucose")
scatter3d.get_default_z_axis().set_title("Age")
scatter3d.add_legend().add(scatter3d)
scatter3d.open()

## Creating stacked bar chart

In [10]:
outcome_counts = df.groupby(['Age_range', 'Outcome_label'], observed=True).size().unstack(fill_value=0).reset_index()  # ages to columns

result = []
for outcome_label in ['Diabetes Negative', 'Diabetes Positive']:  # make json-like formation of data
    values = outcome_counts[outcome_label].tolist()   
    result.append({                     
        'subCategory': outcome_label,
        'values': values
    })

In [11]:
barchart_stacked = lc.BarChart(  # initialize bar chart
    vertical=True,
    theme=lc.Themes.White,
    title='Stacked Bar Chart',
).set_value_label_display_mode('insideBar')  # count of entries is inside bar, default - outside
barchart_stacked.set_data_stacked(labels, result)  # set data
barchart_stacked.add_legend().add(barchart_stacked)  # add legend
barchart_stacked.open()

## Creating box plots

In [12]:
glucose_diabetes = df_positive['Glucose'].values  # divide entries by diabetes
glucose_no_diabetes = df_negative['Glucose'].values

boxplt = lc.BoxPlot(  # init bax plot
    data=[glucose_no_diabetes, glucose_diabetes],
    theme=lc.Themes.White,
    title='Glucose',
    xlabel='No diabetes (left), Diabetes positive (right)',
    ylabel='Glucose'
)
boxplt.open()

In [13]:
pregnancies_diabetes = df_positive['Pregnancies'].values
pregnancies_no_diabetes = df_negative['Pregnancies'].values

boxplt1 = lc.BoxPlot(
    data=[pregnancies_no_diabetes, pregnancies_diabetes],
    theme=lc.Themes.White,
    title='Pregnancies',
    xlabel='No diabetes (left), Diabetes positive (right)',
    ylabel='Pregnancies'
)
boxplt1.open()