In [1]:
# Pandas Web User Guide Follow along
# Merge, join, concatenate and compare
# https://pandas.pydata.org/docs/user_guide/merging.html
# Created 11/28/20

%matplotlib notebook

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO

# How interactive you want is discussed:
# https://ipython.readthedocs.io/en/stable/config/options/terminal.html
# Options are: 'all', 'last', 'last_expr', 'none', 'last_expr_or_assign'
# Default is: 'last_expr'

from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity I can only get last_expr_or_assign to work
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

In [2]:
def diag(*args):
    """Pandas diagnostics"""
    
    for i in args:
        
        if isinstance(i, pd.core.frame.DataFrame):
            print(i.info())
            display(i)
        else:
            print(f'{"-"*40}')
            print(f'Type: {type(i)}')

            try:
                print(f'Length: {len(i)}')
            except:
                pass

            try:
                print(i.info())
            except:
                pass

            try:
                display(i)
            except:
                print(i)
                
z = diag
d = display;

<function IPython.core.display.display(*objs, include=None, exclude=None, metadata=None, transient=None, display_id=None, **kwargs)>

In [3]:
def read_df(text):
    """Create a pandas dataframe from a string of a dataframe
    copied from the pandas website tutorial."""
    lines = text.split('\n')
    cols = lines[0].split()
    index, array = [], []
    for line in lines[1:]:
        vals = line.split()
        index.append(vals[0])
        array.append(vals[1:])
#     print(cols)
#     print(index)
#     print(array)
    df = pd.DataFrame(array, index=index, columns=cols)
    return df

In [17]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                   index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                   index=[4, 5, 6, 7])

df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                    'B': ['B8', 'B9', 'B10', 'B11'],
                    'C': ['C8', 'C9', 'C10', 'C11'],
                    'D': ['D8', 'D9', 'D10', 'D11']},
                   index=[8, 9, 10, 11])

frames = [df1, df2, df3]
result = pd.concat(frames)
z(frames, result)

----------------------------------------
Type: <class 'list'>
Length: 3


[    A   B   C   D
 0  A0  B0  C0  D0
 1  A1  B1  C1  D1
 2  A2  B2  C2  D2
 3  A3  B3  C3  D3,
     A   B   C   D
 4  A4  B4  C4  D4
 5  A5  B5  C5  D5
 6  A6  B6  C6  D6
 7  A7  B7  C7  D7,
       A    B    C    D
 8    A8   B8   C8   D8
 9    A9   B9   C9   D9
 10  A10  B10  C10  D10
 11  A11  B11  C11  D11]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   A       12 non-null     object
 1   B       12 non-null     object
 2   C       12 non-null     object
 3   D       12 non-null     object
dtypes: object(4)
memory usage: 288.0+ bytes
None


Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [5]:
df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
                    'D': ['D2', 'D3', 'D6', 'D7'],
                    'F': ['F2', 'F3', 'F6', 'F7']},
                   index=[2, 3, 6, 7])

Unnamed: 0,B,D,F
2,B2,D2,F2
3,B3,D3,F3
6,B6,D6,F6
7,B7,D7,F7


In [6]:
s1 = pd.Series(['X0', 'X1', 'X2', 'X3'], name='X')

0    X0
1    X1
2    X2
3    X3
Name: X, dtype: object

In [7]:
result = pd.concat([df1, s1], axis=1)

Unnamed: 0,A,B,C,D,X
0,A0,B0,C0,D0,X0
1,A1,B1,C1,D1,X1
2,A2,B2,C2,D2,X2
3,A3,B3,C3,D3,X3


In [8]:
result = pd.concat([df1, s1, s1], axis=1)

Unnamed: 0,A,B,C,D,X,X.1
0,A0,B0,C0,D0,X0,X0
1,A1,B1,C1,D1,X1,X1
2,A2,B2,C2,D2,X2,X2
3,A3,B3,C3,D3,X3,X3


In [9]:
s1 = pd.Series(['X0', 'X1', 'X2', 'X3'])

0    X0
1    X1
2    X2
3    X3
dtype: object

In [10]:
result = pd.concat([df1, s1, s1], axis=1)

Unnamed: 0,A,B,C,D,0,1
0,A0,B0,C0,D0,X0,X0
1,A1,B1,C1,D1,X1,X1
2,A2,B2,C2,D2,X2,X2
3,A3,B3,C3,D3,X3,X3


In [12]:
s3 = pd.Series([0, 1, 2, 3], name='foo')
s4 = pd.Series([0, 1, 2, 3])
s5 = pd.Series([0, 1, 4, 5])
df1 = pd.concat([s3, s4, s5], axis=1)
d(s3, s4, s5, df1)

0    0
1    1
2    2
3    3
Name: foo, dtype: int64

0    0
1    1
2    2
3    3
dtype: int64

0    0
1    1
2    4
3    5
dtype: int64

Unnamed: 0,foo,0,1
0,0,0,0
1,1,1,1
2,2,2,4
3,3,3,5


In [18]:
pieces = {'x': df1, 'y': df2, 'z': df3}

{'x':     A   B   C   D
 0  A0  B0  C0  D0
 1  A1  B1  C1  D1
 2  A2  B2  C2  D2
 3  A3  B3  C3  D3,
 'y':     A   B   C   D
 4  A4  B4  C4  D4
 5  A5  B5  C5  D5
 6  A6  B6  C6  D6
 7  A7  B7  C7  D7,
 'z':       A    B    C    D
 8    A8   B8   C8   D8
 9    A9   B9   C9   D9
 10  A10  B10  C10  D10
 11  A11  B11  C11  D11}

In [19]:
result = pd.concat(pieces)

Unnamed: 0,Unnamed: 1,A,B,C,D
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9


In [23]:
result = pd.concat(pieces, keys=['z', 'y'], levels=[['z', 'y', 'x', 'w']])

Unnamed: 0,Unnamed: 1,A,B,C,D
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9
z,10,A10,B10,C10,D10
z,11,A11,B11,C11,D11
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7


In [24]:
result.index.levels

FrozenList([['z', 'y', 'x', 'w'], [4, 5, 6, 7, 8, 9, 10, 11]])