# Semi graphic displays and charsets

Some text or semi graphic displays included in stemgraphic.

## imports

In [1]:
import pandas as pd
from stemgraphic.num import text_heatmap, heatmatrix, text_hist, text_dot, stem_tally, stem_text
from stemgraphic.helpers import available_charsets

## Loading some data

In [2]:
df = pd.read_csv('../datasets/home_data.csv')

In [3]:
df.shape

(21613, 21)

In [4]:
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


## Heatmaps

These are stem-and-leaf heatmaps as introduced by stemgraphic. columns are leaves, rows are stems.

Limited to 300 by default, random state for reproducibility

In [5]:
heatmatrix(df.zipcode, charset='bold', random_state=42);

Stem-and-leaf heatmap (9819.8 x 10 )
        𝟎   𝟏   𝟐   𝟑   𝟒   𝟓   𝟔   𝟕   𝟖   𝟗
stem                                         
𝟗𝟖𝟎𝟎    𝟎  𝟏𝟐   𝟖  𝟏𝟏  𝟏𝟔  𝟏𝟎  𝟐𝟐  𝟏𝟎  𝟏𝟕   𝟎
𝟗𝟖𝟎𝟏    𝟓   𝟔   𝟎   𝟎   𝟖   𝟎   𝟎   𝟎   𝟎   𝟔
𝟗𝟖𝟎𝟐    𝟎   𝟎  𝟏𝟓  𝟏𝟔   𝟏   𝟎   𝟎  𝟏𝟐   𝟔  𝟏𝟐
𝟗𝟖𝟎𝟑   𝟏𝟏   𝟗   𝟐  𝟏𝟐  𝟐𝟖   𝟎   𝟎   𝟎  𝟐𝟎   𝟏
𝟗𝟖𝟎𝟒   𝟏𝟏   𝟎  𝟐𝟎   𝟎   𝟎  𝟏𝟐   𝟎   𝟎   𝟎   𝟎
𝟗𝟖𝟎𝟓    𝟎   𝟎  𝟐𝟑  𝟏𝟔   𝟎  𝟏𝟏  𝟐𝟏   𝟎  𝟐𝟒  𝟏𝟔
𝟗𝟖𝟎𝟔    𝟎   𝟎   𝟎   𝟎   𝟎  𝟏𝟒   𝟎   𝟎   𝟎   𝟎
𝟗𝟖𝟎𝟕    𝟐   𝟎  𝟏𝟑   𝟎  𝟐𝟑  𝟏𝟑   𝟎   𝟕   𝟎   𝟎
𝟗𝟖𝟎𝟖    𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎
𝟗𝟖𝟎𝟗    𝟎   𝟎  𝟏𝟓   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎
𝟗𝟖𝟏𝟎    𝟎   𝟎   𝟐  𝟐𝟒   𝟎   𝟔  𝟏𝟖   𝟖   𝟖   𝟕
𝟗𝟖𝟏𝟏    𝟎   𝟎  𝟏𝟏   𝟎   𝟎  𝟐𝟐  𝟏𝟓  𝟏𝟔  𝟐𝟑  𝟏𝟏
𝟗𝟖𝟏𝟐    𝟎   𝟎  𝟐𝟎   𝟎   𝟎  𝟏𝟕  𝟏𝟓   𝟎   𝟎   𝟎
𝟗𝟖𝟏𝟑    𝟎   𝟎   𝟎  𝟐𝟒   𝟎   𝟎  𝟏𝟑   𝟎   𝟎   𝟎
𝟗𝟖𝟏𝟒    𝟎   𝟎   𝟎   𝟎  𝟏𝟔   𝟎   𝟗   𝟎   𝟓   𝟎
𝟗𝟖𝟏𝟓    𝟎   𝟎   𝟎   𝟎   𝟎  𝟐𝟏   𝟎   𝟎   𝟎   𝟎
𝟗𝟖𝟏𝟔    𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟖   𝟎   𝟗   𝟎
𝟗𝟖𝟏𝟕    𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎  𝟏𝟎  𝟏𝟏   𝟎
𝟗𝟖𝟏𝟖    𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟎   𝟓  

Limited to a sample of 300 by default (display= to modify), random state for reproducibility. heatmap is more readable for pattern than heatmatrix

In [6]:
text_heatmap(df.zipcode, charset='sansbold', random_state=42);

Stem-and-leaf heatmap (9819.8 x 10 )
        𝟬   𝟭   𝟮   𝟯   𝟰   𝟱   𝟲   𝟳   𝟴   𝟵
stem                                         
𝟵𝟴𝟬𝟬       𝟭𝟮   𝟴  𝟭𝟭  𝟭𝟲  𝟭𝟬  𝟮𝟮  𝟭𝟬  𝟭𝟳    
𝟵𝟴𝟬𝟭    𝟱   𝟲           𝟴                   𝟲
𝟵𝟴𝟬𝟮           𝟭𝟱  𝟭𝟲   𝟭          𝟭𝟮   𝟲  𝟭𝟮
𝟵𝟴𝟬𝟯   𝟭𝟭   𝟵   𝟮  𝟭𝟮  𝟮𝟴              𝟮𝟬   𝟭
𝟵𝟴𝟬𝟰   𝟭𝟭      𝟮𝟬          𝟭𝟮                
𝟵𝟴𝟬𝟱           𝟮𝟯  𝟭𝟲      𝟭𝟭  𝟮𝟭      𝟮𝟰  𝟭𝟲
𝟵𝟴𝟬𝟲                       𝟭𝟰                
𝟵𝟴𝟬𝟳    𝟮      𝟭𝟯      𝟮𝟯  𝟭𝟯       𝟳        
𝟵𝟴𝟬𝟵           𝟭𝟱                            
𝟵𝟴𝟭𝟬            𝟮  𝟮𝟰       𝟲  𝟭𝟴   𝟴   𝟴   𝟳
𝟵𝟴𝟭𝟭           𝟭𝟭          𝟮𝟮  𝟭𝟱  𝟭𝟲  𝟮𝟯  𝟭𝟭
𝟵𝟴𝟭𝟮           𝟮𝟬          𝟭𝟳  𝟭𝟱            
𝟵𝟴𝟭𝟯               𝟮𝟰          𝟭𝟯            
𝟵𝟴𝟭𝟰                   𝟭𝟲       𝟵       𝟱    
𝟵𝟴𝟭𝟱                       𝟮𝟭                
𝟵𝟴𝟭𝟲                            𝟴       𝟵    
𝟵𝟴𝟭𝟳                               𝟭𝟬  𝟭𝟭    
𝟵𝟴𝟭𝟴                                    𝟱    
𝟵𝟴𝟭𝟵                                   𝟭𝟮  

## Tally chart

In [7]:
stem_tally(df.price)

75000
    ¡
  0 |𝍪
  1 |卌卌𝍬
  2 |卌卌卌卌卌卌卌卌卌卌𝍪
  3 |卌卌卌卌卌卌卌卌卌卌卌𝍩
  4 |卌卌卌卌卌卌卌卌卌卌卌卌
  5 |卌卌卌卌卌卌卌𝍪
  6 |卌卌卌卌𝍬
  7 |卌卌卌卌卌
  8 |卌卌𝍫
  9 |卌𝍪
 10 |卌𝍩
 11 |𝍪
 12 |𝍫
 13 |𝍬
 14 |𝍫
 15 |𝍫
 16 |𝍪
 17 |
 18 |𝍪
 19 |
 20 |
 21 |
 22 |
 23 |𝍪
 24 |
 25 |𝍪
 26 |
 27 |𝍪
    !
7700000
Key: 
27|0 => 27.0x100000.0 = 2700000.0 


## Dot plot

With flip_axes (rotated 90) and symmetric options

In [8]:
text_dot(df.price, symmetric=True, flip_axes=True)


   ●                      
   ●                      
   ●                      
  ●●                      
  ●●●                     
  ●●●                     
  ●●●                     
  ●●●                     
  ●●●●                    
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●                   
  ●●●●●●                  
  ●●●●●●                  
 ●●●●●●●●                 
 ●●●●●●●●                 
 ●●●●●●●●                 
 ●●●●●●●●●●  ●            
 ●●●●●●●●●●● ●            
●●●●●●●●●●●●●●  ●● ●   ●●●
 ●●●●●●●●●●● ●   ●        
 ●●●●●●●●●●  ●            
 ●●●●●●●●●●               
 ●●●●●●●●                 
 ●●●●●●●●                 
 ●●●●●●●●                 
  ●●●●●●                  
  ●●●●●●                  
  ●●●●●                   


## Histogram

Basically a histogram where binning is based on the stem-and-leaf bins (decimal, or break on 2 or break on 5), but can also be zoomed in (-1, -2) or out (+1, +2). shade can be 'none', 'light', 'medium', 'dark' or 'full'.

In [9]:
text_hist(df.bathrooms, display=100, zoom=1, random_state=42, shade='dark')

  1 |▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
    |
  2 |▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
    |▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
  3 |▓▓▓▓▓▓
    |▓▓▓▓▓
  4 |▓▓▓▓
Scale: 
4|2 => 4.2x1.0 = 4.2 


## Charset support

Some of these render slightly differently in the console (python, ipython) versus in the notebook.

In [10]:
available_charsets()

['arabic',
 'arabic_r',
 'bold',
 'circled',
 'default',
 'doublestruck',
 'fullwidth',
 'gurmukhi',
 'mono',
 'nko',
 'rod',
 'roman',
 'sans',
 'sansbold',
 'square',
 'subscript',
 'tamil']

The alignment might not be 100% based on the font available on your system, but in a terminal, alignment will be correct, which is where most people will use these. arabic and arabic_r are reversed (right to left, left to right) in the console compared to the notebook.

In [11]:
for charset in available_charsets():
    print('Using charset: {}'.format(charset))
    stem_text(df.sqft_living, charset=charset, random_state=42);
    print()
    print('____________________________________________________________________')
    print()

Using charset: arabic
٢٩٠
    ¡
  ٥ | ٢٢
  ٦ |
  ٧ | ١٣
  ٨ | ٣٤٥
  ٩ | ١١٢٤٧٧
 ١٠ | ١١٤٥٥٦٦٩
 ١١ | ١١١٢٢٢٣٣٤٤٤٤٦٨٨٨٩٩
 ١٢ | ٠٢٣٤٤٤٦٧٧٧٨٩٩
 ١٣ | ٠٠٢٣٣٣٣٤٤٥٦٦٧٩٩
 ١٤ | ١١١٢٢٣٣٤٥٦٦٦٦٨٩٩
 ١٥ | ٠١٢٢٢٣٣٣٤٤٤٦٨٩٩
 ١٦ | ٠٠١١١١٢٣٤٤٤٥٥٦٧٧٧٨٨
 ١٧ | ٠٠١١٢٣٤٥٥٥٥٧٨٨٩
 ١٨ | ٠٠٠١٣٣٣٤٥٥٧٩
 ١٩ | ١٢٣٤٤٥٥٦٦٦٧٧٨٩٩
 ٢٠ | ٠٠٤٥٥٦٦٧٧٧٨٨٩
 ٢١ | ٠١٢٢٣٦٦٦٨
 ٢٢ | ٠٠٠٢٣٣٤٥٥٦٦٧٨٨٩٩٩
 ٢٣ | ٠١٢٢٢٣٣٤٤٧٨٩
 ٢٤ | ٠٠١٢٤٥٦٨٨٩٩
 ٢٥ | ٠٠٣٤٥٥٦٦٧٩٩
 ٢٦ | ١٢٢٣٤٩
 ٢٧ | ٢٣٩٩٩
 ٢٨ | ٠٢٣٣٥
 ٢٩ | ٠٠٧٨٩
 ٣٠ | ٤٦٨
 ٣١ | ٢٥٨
 ٣٢ | ١٢٣٤٧
 ٣٣ | ١٣٦
 ٣٤ | ٠٢٣٦٨
 ٣٥ | ٤٥٩
 ٣٦ | ٠٠٨
 ٣٧ | ٢٧
 ٣٨ | ١
 ٣٩ | ٢
 ٤٠ | ٢٦
 ٤١ | ٢٣
 ٤٢ | ٨
 ٤٣ | ٦
 ٤٤ | ١
 ٤٥ | ٦
 ٤٦ | ٨
 ٤٧ | ٢
 ٤٨ | ٣
 ٤٩ |
 ٥٠ | ٢
 ٥١ |
 ٥٢ |
 ٥٣ | ٥
 ٥٤ | ٨
 ٥٥ |
 ٥٦ |
 ٥٧ | ١٣
 ٥٨ | ٦
 ٥٩ |
 ٦٠ | ٨
    !
١٣٥٤٠
Key: 
60|8 => 60.8x100.0 = 6080.0 

____________________________________________________________________

Using charset: arabic_r
¡    
٠٩٢
٢٢ | ٥  
 ٦  |
٣١ | ٧  
٥٤٣ | ٨  
٧٧٤٢١١ | ٩  
٩٦٦٥٥٤١١ | ٠١ 
٩٩٨٨٨٦٤٤٤٤٣٣٢٢٢١١١ | ١١ 
٩٩٨٧٧٧٦٤٤٤٣٢٠ | ٢١ 
٩٩٧٦٦٥٤٤٣٣٣٣٢٠٠ | ٣١ 
٩٩

₂₉₀
    ¡
  ₅ | ₂₂
  ₆ |
  ₇ | ₁₃
  ₈ | ₃₄₅
  ₉ | ₁₁₂₄₇₇
 ₁₀ | ₁₁₄₅₅₆₆₉
 ₁₁ | ₁₁₁₂₂₂₃₃₄₄₄₄₆₈₈₈₉₉
 ₁₂ | ₀₂₃₄₄₄₆₇₇₇₈₉₉
 ₁₃ | ₀₀₂₃₃₃₃₄₄₅₆₆₇₉₉
 ₁₄ | ₁₁₁₂₂₃₃₄₅₆₆₆₆₈₉₉
 ₁₅ | ₀₁₂₂₂₃₃₃₄₄₄₆₈₉₉
 ₁₆ | ₀₀₁₁₁₁₂₃₄₄₄₅₅₆₇₇₇₈₈
 ₁₇ | ₀₀₁₁₂₃₄₅₅₅₅₇₈₈₉
 ₁₈ | ₀₀₀₁₃₃₃₄₅₅₇₉
 ₁₉ | ₁₂₃₄₄₅₅₆₆₆₇₇₈₉₉
 ₂₀ | ₀₀₄₅₅₆₆₇₇₇₈₈₉
 ₂₁ | ₀₁₂₂₃₆₆₆₈
 ₂₂ | ₀₀₀₂₃₃₄₅₅₆₆₇₈₈₉₉₉
 ₂₃ | ₀₁₂₂₂₃₃₄₄₇₈₉
 ₂₄ | ₀₀₁₂₄₅₆₈₈₉₉
 ₂₅ | ₀₀₃₄₅₅₆₆₇₉₉
 ₂₆ | ₁₂₂₃₄₉
 ₂₇ | ₂₃₉₉₉
 ₂₈ | ₀₂₃₃₅
 ₂₉ | ₀₀₇₈₉
 ₃₀ | ₄₆₈
 ₃₁ | ₂₅₈
 ₃₂ | ₁₂₃₄₇
 ₃₃ | ₁₃₆
 ₃₄ | ₀₂₃₆₈
 ₃₅ | ₄₅₉
 ₃₆ | ₀₀₈
 ₃₇ | ₂₇
 ₃₈ | ₁
 ₃₉ | ₂
 ₄₀ | ₂₆
 ₄₁ | ₂₃
 ₄₂ | ₈
 ₄₃ | ₆
 ₄₄ | ₁
 ₄₅ | ₆
 ₄₆ | ₈
 ₄₇ | ₂
 ₄₈ | ₃
 ₄₉ |
 ₅₀ | ₂
 ₅₁ |
 ₅₂ |
 ₅₃ | ₅
 ₅₄ | ₈
 ₅₅ |
 ₅₆ |
 ₅₇ | ₁₃
 ₅₈ | ₆
 ₅₉ |
 ₆₀ | ₈
    !
₁₃₅₄₀
Key: 
60|8 => 60.8x100.0 = 6080.0 

____________________________________________________________________

Using charset: tamil
௨௯௦
    ¡
  ௫ | ௨௨
  ௬ |
  ௭ | ௧௩
  ௮ | ௩௪௫
  ௯ | ௧௧௨௪௭௭
 ௧௦ | ௧௧௪௫௫௬௬௯
 ௧௧ | ௧௧௧௨௨௨௩௩௪௪௪௪௬௮௮௮௯௯
 ௧௨ | ௦௨௩௪௪௪௬௭௭௭௮௯௯
 ௧௩ | ௦௦௨௩௩௩௩௪௪௫௬௬௭௯௯
 ௧௪ | ௧௧௧௨௨௩௩௪௫௬௬௬௬௮௯௯
 ௧௫ 

If digit glyphs in your language are not available in stemgraphic, but are available in unicode, and you need them to be in stemgraphic, please open an issue on github.