# summarytools Quick Start

## Imports

In [1]:
import numpy as np
import pandas as pd

import summarytools
from summarytools import dfSummary, tabset

print(f"summarytools version: {summarytools.__version__}")

summarytools version: 0.1.0


## Titanic data

In [2]:
titanic = pd.read_csv('./data/titanic.csv')
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Vaccine Data

In [3]:
vaccine = pd.read_csv('./data/country_vaccinations.csv')
vaccine['date'] = pd.to_datetime(vaccine['date'])
vaccine.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Albania,ALB,2021-01-10,0.0,0.0,,,,0.0,0.0,,,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...
1,Albania,ALB,2021-01-11,,,,,64.0,,,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...
2,Albania,ALB,2021-01-12,128.0,128.0,,,64.0,0.0,0.0,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...
3,Albania,ALB,2021-01-13,188.0,188.0,,60.0,63.0,0.01,0.01,,22.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...
4,Albania,ALB,2021-01-14,266.0,266.0,,78.0,66.0,0.01,0.01,,23.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/vaksinimi-anticovid...


## Generate dummy data

In [4]:
catCol = np.array(list('ABCD'))[np.random.randint(4, size=100)]
catCol2 = pd.Categorical(catCol)
numCol = np.random.random(size=100)
intCol = np.random.randint(5, size=100)
intCol2 = np.random.randint(30, size=100)
datCol = pd.date_range(
    '2018-01-01', '2018-01-31')[np.random.randint(31, size=100)]
boolCol = np.array(list([True, False]))[np.random.randint(2, size=100)]
constCol = np.ones(100)

data = pd.DataFrame({
    'catCol': catCol,
    'catCol2': catCol2,
    'intCol': intCol,
    'intCol2': intCol2,
    'numCol': numCol,
    'datCol': datCol,
    'boolCol': boolCol,
    'constCol_longlonglonglonglonglongName': constCol
})

print(data.dtypes)

data.head()

catCol                                           object
catCol2                                        category
intCol                                            int32
intCol2                                           int32
numCol                                          float64
datCol                                   datetime64[ns]
boolCol                                            bool
constCol_longlonglonglonglonglongName           float64
dtype: object


Unnamed: 0,catCol,catCol2,intCol,intCol2,numCol,datCol,boolCol,constCol_longlonglonglonglonglongName
0,D,D,2,11,0.918423,2018-01-15,False,1.0
1,A,A,2,12,0.495542,2018-01-13,True,1.0
2,B,B,1,3,0.890818,2018-01-17,False,1.0
3,C,C,3,19,0.068066,2018-01-12,True,1.0
4,C,C,4,7,0.556225,2018-01-04,False,1.0


## Out-of-Box Summary

In [5]:
dfSummary(data)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,catCol [object],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
2,catCol2 [category],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
3,intCol [int32],1. 2 2. 4 3. 1 4. 3 5. 0,24 (24.0%) 23 (23.0%) 22 (22.0%) 20 (20.0%) 11 (11.0%),,0 (0.0%)
4,intCol2 [int32],Mean (sd) : 14.4 (8.9) min < med < max: 0.0 < 16.0 < 29.0 IQR (CV) : 15.2 (1.6),28 distinct values,,0 (0.0%)
5,numCol [float64],Mean (sd) : 0.5 (0.3) min < med < max: 0.0 < 0.5 < 1.0 IQR (CV) : 0.5 (1.7),100 distinct values,,0 (0.0%)
6,datCol [datetime64[ns]],Min: 2018-01-01 Max: 2018-01-31 Duration: 30 days,30 distinct values,,0 (0.0%)
7,boolCol [bool],1. True 2. False,50 (50.0%) 50 (50.0%),,0 (0.0%)
8,constCol_longlonglonglonglonglongName [float64],1. 1.0,100 (100.0%),,0 (0.0%)


In [6]:
dfSummary(titanic)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,PassengerId [int64],"1. Robert, Mrs. Edward Scott (Eli 2. Davies, Mr. Alfred J 3. Gustafsson, Mr. Alfred Ossian 4. Smith, Miss. Marion Elsie 5. Farrell, Mr. James 6. Artagaveytia, Mr. Ramon 7. Abbott, Mrs. Stanton (Rosa Hun 8. Harper, Mr. Henry Sleeper 9. Myhrman, Mr. Pehr Fabian Olive 10. Mallet, Master. Andre 11. other",1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 881 (98.9%),,0 (0.0%)
2,Survived [int64],1. male 2. female,577 (64.8%) 314 (35.2%),,0 (0.0%)
3,Pclass [int64],Mean (sd) : 29.7 (14.5) min < med < max: 0.4 < 28.0 < 80.0 IQR (CV) : 17.9 (2.0),88 distinct values,,0 (0.0%)
4,Name [object],1. 1601 2. CA. 2343 3. 347082 4. 347088 5. CA 2144 6. 3101295 7. 382652 8. S.O.C. 14879 9. 4133 10. 349909 11. other,7 (0.8%) 7 (0.8%) 7 (0.8%) 6 (0.7%) 6 (0.7%) 6 (0.7%) 5 (0.6%) 5 (0.6%) 4 (0.4%) 4 (0.4%) 834 (93.6%),,0 (0.0%)
5,Sex [object],Mean (sd) : 32.2 (49.7) min < med < max: 0.0 < 14.5 < 512.3 IQR (CV) : 23.1 (0.6),248 distinct values,,0 (0.0%)
6,Age [float64],1. nan 2. G6 3. B96 B98 4. C23 C25 C27 5. C22 C26 6. F33 7. E101 8. F2 9. D 10. B18 11. other,687 (77.1%) 4 (0.4%) 4 (0.4%) 4 (0.4%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 2 (0.2%) 175 (19.6%),,177 (19.9%)
7,SibSp [int64],1. S 2. C 3. Q 4. nan,644 (72.3%) 168 (18.9%) 77 (8.6%) 2 (0.2%),,0 (0.0%)
8,Parch [int64],,,,0 (0.0%)
9,Ticket [object],,,,0 (0.0%)
10,Fare [float64],,,,0 (0.0%)


## Collapsible Summary

In [7]:
dfSummary(data, is_collapsible=True)
dfSummary(titanic, is_collapsible=True)
dfSummary(vaccine, is_collapsible=True)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,catCol [object],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
2,catCol2 [category],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
3,intCol [int32],1. 2 2. 4 3. 1 4. 3 5. 0,24 (24.0%) 23 (23.0%) 22 (22.0%) 20 (20.0%) 11 (11.0%),,0 (0.0%)
4,intCol2 [int32],Mean (sd) : 14.4 (8.9) min < med < max: 0.0 < 16.0 < 29.0 IQR (CV) : 15.2 (1.6),28 distinct values,,0 (0.0%)
5,numCol [float64],Mean (sd) : 0.5 (0.3) min < med < max: 0.0 < 0.5 < 1.0 IQR (CV) : 0.5 (1.7),100 distinct values,,0 (0.0%)
6,datCol [datetime64[ns]],Min: 2018-01-01 Max: 2018-01-31 Duration: 30 days,30 distinct values,,0 (0.0%)
7,boolCol [bool],1. True 2. False,50 (50.0%) 50 (50.0%),,0 (0.0%)
8,constCol_longlonglonglonglonglongName [float64],1. 1.0,100 (100.0%),,0 (0.0%)


No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,PassengerId [int64],"1. Robert, Mrs. Edward Scott (Eli 2. Davies, Mr. Alfred J 3. Gustafsson, Mr. Alfred Ossian 4. Smith, Miss. Marion Elsie 5. Farrell, Mr. James 6. Artagaveytia, Mr. Ramon 7. Abbott, Mrs. Stanton (Rosa Hun 8. Harper, Mr. Henry Sleeper 9. Myhrman, Mr. Pehr Fabian Olive 10. Mallet, Master. Andre 11. other",1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 881 (98.9%),,0 (0.0%)
2,Survived [int64],1. male 2. female,577 (64.8%) 314 (35.2%),,0 (0.0%)
3,Pclass [int64],Mean (sd) : 29.7 (14.5) min < med < max: 0.4 < 28.0 < 80.0 IQR (CV) : 17.9 (2.0),88 distinct values,,0 (0.0%)
4,Name [object],1. 1601 2. CA. 2343 3. 347082 4. 347088 5. CA 2144 6. 3101295 7. 382652 8. S.O.C. 14879 9. 4133 10. 349909 11. other,7 (0.8%) 7 (0.8%) 7 (0.8%) 6 (0.7%) 6 (0.7%) 6 (0.7%) 5 (0.6%) 5 (0.6%) 4 (0.4%) 4 (0.4%) 834 (93.6%),,0 (0.0%)
5,Sex [object],Mean (sd) : 32.2 (49.7) min < med < max: 0.0 < 14.5 < 512.3 IQR (CV) : 23.1 (0.6),248 distinct values,,0 (0.0%)
6,Age [float64],1. nan 2. G6 3. B96 B98 4. C23 C25 C27 5. C22 C26 6. F33 7. E101 8. F2 9. D 10. B18 11. other,687 (77.1%) 4 (0.4%) 4 (0.4%) 4 (0.4%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 2 (0.2%) 175 (19.6%),,177 (19.9%)
7,SibSp [int64],1. S 2. C 3. Q 4. nan,644 (72.3%) 168 (18.9%) 77 (8.6%) 2 (0.2%),,0 (0.0%)
8,Parch [int64],,,,0 (0.0%)
9,Ticket [object],,,,0 (0.0%)
10,Fare [float64],,,,0 (0.0%)


No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,country [object],1. Northern Ireland 2. Scotland 3. United Kingdom 4. England 5. Wales 6. Russia 7. China 8. Israel 9. United States 10. Bahrain 11. other,"60 (2.1%) 60 (2.1%) 60 (2.1%) 60 (2.1%) 60 (2.1%) 58 (2.0%) 57 (2.0%) 55 (1.9%) 54 (1.9%) 51 (1.7%) 2,341 (80.3%)",,0 (0.0%)
2,iso_code [object],1. nan 2. GBR 3. RUS 4. CHN 5. ISR 6. USA 7. BHR 8. CHL 9. MEX 10. SVN 11. other,"240 (8.2%) 60 (2.1%) 58 (2.0%) 57 (2.0%) 55 (1.9%) 54 (1.9%) 51 (1.7%) 50 (1.7%) 50 (1.7%) 47 (1.6%) 2,194 (75.2%)",,240 (8.2%)
3,date [datetime64[ns]],Min: 2020-12-13 Max: 2021-02-11 Duration: 60 days,61 distinct values,,0 (0.0%)
4,total_vaccinations [float64],Mean (sd) : 1252919.5 (4149777.8) min < med < max: 0.0 < 153830.0 < 46390270.0 IQR (CV) : 627757.8 (0.3),"1,816 distinct values",,"1,048 (35.9%)"
5,people_vaccinated [float64],Mean (sd) : 1058740.5 (3461624.9) min < med < max: 0.0 < 143007.5 < 34723964.0 IQR (CV) : 536011.5 (0.3),"1,495 distinct values",,"1,386 (47.5%)"
6,people_fully_vaccinated [float64],Mean (sd) : 296533.4 (1042176.9) min < med < max: 1.0 < 23833.0 < 11188782.0 IQR (CV) : 114519.5 (0.3),891 distinct values,,"1,978 (67.8%)"
7,daily_vaccinations_raw [float64],Mean (sd) : 69519.9 (193080.3) min < med < max: 0.0 < 10566.0 < 2218752.0 IQR (CV) : 52615.5 (0.4),"1,459 distinct values",,"1,377 (47.2%)"
8,daily_vaccinations [float64],Mean (sd) : 55555.7 (172073.9) min < med < max: 1.0 < 6149.0 < 1916190.0 IQR (CV) : 27190.0 (0.3),"2,236 distinct values",,115 (3.9%)
9,total_vaccinations_per_hundred [float64],Mean (sd) : 5.0 (9.5) min < med < max: 0.0 < 1.9 < 71.2 IQR (CV) : 3.8 (0.5),786 distinct values,,"1,048 (35.9%)"
10,people_vaccinated_per_hundred [float64],Mean (sd) : 4.4 (7.5) min < med < max: 0.0 < 2.0 < 45.3 IQR (CV) : 3.0 (0.6),668 distinct values,,"1,386 (47.5%)"


## Tabbed Summary

In [8]:
tabset({
    'data': dfSummary(data).render(),
    'titanic': dfSummary(titanic).render(),
    'vaccine': dfSummary(vaccine).render()})

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,catCol [object],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
2,catCol2 [category],1. B 2. A 3. D 4. C,35 (35.0%) 28 (28.0%) 21 (21.0%) 16 (16.0%),,0 (0.0%)
3,intCol [int32],1. 2 2. 4 3. 1 4. 3 5. 0,24 (24.0%) 23 (23.0%) 22 (22.0%) 20 (20.0%) 11 (11.0%),,0 (0.0%)
4,intCol2 [int32],Mean (sd) : 14.4 (8.9) min < med < max: 0.0 < 16.0 < 29.0 IQR (CV) : 15.2 (1.6),28 distinct values,,0 (0.0%)
5,numCol [float64],Mean (sd) : 0.5 (0.3) min < med < max: 0.0 < 0.5 < 1.0 IQR (CV) : 0.5 (1.7),100 distinct values,,0 (0.0%)
6,datCol [datetime64[ns]],Min: 2018-01-01 Max: 2018-01-31 Duration: 30 days,30 distinct values,,0 (0.0%)
7,boolCol [bool],1. True 2. False,50 (50.0%) 50 (50.0%),,0 (0.0%)
8,constCol_longlonglonglonglonglongName [float64],1. 1.0,100 (100.0%),,0 (0.0%)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,PassengerId [int64],"1. Robert, Mrs. Edward Scott (Eli 2. Davies, Mr. Alfred J 3. Gustafsson, Mr. Alfred Ossian 4. Smith, Miss. Marion Elsie 5. Farrell, Mr. James 6. Artagaveytia, Mr. Ramon 7. Abbott, Mrs. Stanton (Rosa Hun 8. Harper, Mr. Henry Sleeper 9. Myhrman, Mr. Pehr Fabian Olive 10. Mallet, Master. Andre 11. other",1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 1 (0.1%) 881 (98.9%),,0 (0.0%)
2,Survived [int64],1. male 2. female,577 (64.8%) 314 (35.2%),,0 (0.0%)
3,Pclass [int64],Mean (sd) : 29.7 (14.5) min < med < max: 0.4 < 28.0 < 80.0 IQR (CV) : 17.9 (2.0),88 distinct values,,0 (0.0%)
4,Name [object],1. 1601 2. CA. 2343 3. 347082 4. 347088 5. CA 2144 6. 3101295 7. 382652 8. S.O.C. 14879 9. 4133 10. 349909 11. other,7 (0.8%) 7 (0.8%) 7 (0.8%) 6 (0.7%) 6 (0.7%) 6 (0.7%) 5 (0.6%) 5 (0.6%) 4 (0.4%) 4 (0.4%) 834 (93.6%),,0 (0.0%)
5,Sex [object],Mean (sd) : 32.2 (49.7) min < med < max: 0.0 < 14.5 < 512.3 IQR (CV) : 23.1 (0.6),248 distinct values,,0 (0.0%)
6,Age [float64],1. nan 2. G6 3. B96 B98 4. C23 C25 C27 5. C22 C26 6. F33 7. E101 8. F2 9. D 10. B18 11. other,687 (77.1%) 4 (0.4%) 4 (0.4%) 4 (0.4%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 3 (0.3%) 2 (0.2%) 175 (19.6%),,177 (19.9%)
7,SibSp [int64],1. S 2. C 3. Q 4. nan,644 (72.3%) 168 (18.9%) 77 (8.6%) 2 (0.2%),,0 (0.0%)
8,Parch [int64],,,,0 (0.0%)
9,Ticket [object],,,,0 (0.0%)
10,Fare [float64],,,,0 (0.0%)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,country [object],1. Northern Ireland 2. Scotland 3. United Kingdom 4. England 5. Wales 6. Russia 7. China 8. Israel 9. United States 10. Bahrain 11. other,"60 (2.1%) 60 (2.1%) 60 (2.1%) 60 (2.1%) 60 (2.1%) 58 (2.0%) 57 (2.0%) 55 (1.9%) 54 (1.9%) 51 (1.7%) 2,341 (80.3%)",,0 (0.0%)
2,iso_code [object],1. nan 2. GBR 3. RUS 4. CHN 5. ISR 6. USA 7. BHR 8. CHL 9. MEX 10. SVN 11. other,"240 (8.2%) 60 (2.1%) 58 (2.0%) 57 (2.0%) 55 (1.9%) 54 (1.9%) 51 (1.7%) 50 (1.7%) 50 (1.7%) 47 (1.6%) 2,194 (75.2%)",,240 (8.2%)
3,date [datetime64[ns]],Min: 2020-12-13 Max: 2021-02-11 Duration: 60 days,61 distinct values,,0 (0.0%)
4,total_vaccinations [float64],Mean (sd) : 1252919.5 (4149777.8) min < med < max: 0.0 < 153830.0 < 46390270.0 IQR (CV) : 627757.8 (0.3),"1,816 distinct values",,"1,048 (35.9%)"
5,people_vaccinated [float64],Mean (sd) : 1058740.5 (3461624.9) min < med < max: 0.0 < 143007.5 < 34723964.0 IQR (CV) : 536011.5 (0.3),"1,495 distinct values",,"1,386 (47.5%)"
6,people_fully_vaccinated [float64],Mean (sd) : 296533.4 (1042176.9) min < med < max: 1.0 < 23833.0 < 11188782.0 IQR (CV) : 114519.5 (0.3),891 distinct values,,"1,978 (67.8%)"
7,daily_vaccinations_raw [float64],Mean (sd) : 69519.9 (193080.3) min < med < max: 0.0 < 10566.0 < 2218752.0 IQR (CV) : 52615.5 (0.4),"1,459 distinct values",,"1,377 (47.2%)"
8,daily_vaccinations [float64],Mean (sd) : 55555.7 (172073.9) min < med < max: 1.0 < 6149.0 < 1916190.0 IQR (CV) : 27190.0 (0.3),"2,236 distinct values",,115 (3.9%)
9,total_vaccinations_per_hundred [float64],Mean (sd) : 5.0 (9.5) min < med < max: 0.0 < 1.9 < 71.2 IQR (CV) : 3.8 (0.5),786 distinct values,,"1,048 (35.9%)"
10,people_vaccinated_per_hundred [float64],Mean (sd) : 4.4 (7.5) min < med < max: 0.0 < 2.0 < 45.3 IQR (CV) : 3.0 (0.6),668 distinct values,,"1,386 (47.5%)"


## Export to HTML

### Save notebook checkpoint

In [9]:
%%javascript
Jupyter.notebook.save_checkpoint()
Jupyter.notebook.save_notebook()

<IPython.core.display.Javascript object>

### Export HTML with embedded images

In [10]:
!jupyter nbconvert --to html_embed quick-start.ipynb

[NbConvertApp] Converting notebook quick-start.ipynb to html_embed
