-
Notifications
You must be signed in to change notification settings - Fork 60
/
plot_diabetes.py
76 lines (60 loc) · 2.53 KB
/
plot_diabetes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
==================================
Above-average features in Diabetes
==================================
Explore above-average attributes in the Diabetes dataset (Efron et al, 2004).
Here we take some features correlated with disease progression, and look at the
distribution of that disease progression value when each of these features is
above average.
The most correlated features are:
- bmi body mass index
- bp average blood pressure
- s4 tch, total cholesterol / HDL
- s5 ltg, possibly log of serum triglycerides level
- s6 glu, blood sugar level
This kind of dataset analysis may not be a practical use of UpSet, but helps
to illustrate the :meth:`UpSet.add_catplot` feature.
"""
import pandas as pd
from sklearn.datasets import load_diabetes
from matplotlib import pyplot as plt
from upsetplot import UpSet
# Load the dataset into a DataFrame
diabetes = load_diabetes()
diabetes_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
# Get five features most correlated with median house value
correls = diabetes_df.corrwith(
pd.Series(diabetes.target), method="spearman"
).sort_values()
top_features = correls.index[-5:]
# Get a binary indicator of whether each top feature is above average
diabetes_above_avg = diabetes_df > diabetes_df.median(axis=0)
diabetes_above_avg = diabetes_above_avg[top_features]
diabetes_above_avg = diabetes_above_avg.rename(columns=lambda x: x + ">")
# Make this indicator mask an index of diabetes_df
diabetes_df = pd.concat([diabetes_df, diabetes_above_avg], axis=1)
diabetes_df = diabetes_df.set_index(list(diabetes_above_avg.columns))
# Also give us access to the target (median house value)
diabetes_df = diabetes_df.assign(progression=diabetes.target)
##########################################################################
# UpSet plot it!
upset = UpSet(diabetes_df, subset_size="count", intersection_plot_elements=3)
upset.add_catplot(value="progression", kind="strip", color="blue")
print(diabetes_df)
upset.add_catplot(value="bmi", kind="strip", color="black")
upset.plot()
plt.title("UpSet with catplots, for orientation='horizontal'")
plt.show()
##########################################################################
# And again in vertical orientation
upset = UpSet(
diabetes_df,
subset_size="count",
intersection_plot_elements=3,
orientation="vertical",
)
upset.add_catplot(value="progression", kind="strip", color="blue")
upset.add_catplot(value="bmi", kind="strip", color="black")
upset.plot()
plt.title("UpSet with catplots, for orientation='vertical'")
plt.show()