In [1]:
import pandas as pd
pd.__version__

'1.0.5'

### Creating Pandas DataFrames from Lists and Dictionaries

https://pbpython.com/pandas-list-dict.html

#### Row oriented with dicts
The “default” manner to create a DataFrame from python is to use a list of dictionaries. In this case each dictionary key is used for the column headings. A default index will be created automatically:

In [2]:
sales = [{'account': 'Jones LLC', 'Jan': 150, 'Feb': 200, 'Mar': 140},
         {'account': 'Alpha Co',  'Jan': 200, 'Feb': 210, 'Mar': 215},
         {'account': 'Blue Inc',  'Jan': 50,  'Feb': 90,  'Mar': 95 }]

df = pd.DataFrame(sales)

df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,140
1,Alpha Co,200,210,215
2,Blue Inc,50,90,95


In [3]:
df.dtypes

account    object
Jan         int64
Feb         int64
Mar         int64
dtype: object

In [4]:
df['account'].astype('category')

0    Jones LLC
1     Alpha Co
2     Blue Inc
Name: account, dtype: category
Categories (3, object): [Alpha Co, Blue Inc, Jones LLC]

In [53]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,140
1,Alpha Co,200,210,215
2,Blue Inc,50,90,95


In [54]:
df.iloc[1, [2,3]]

Feb    210
Mar    215
Name: 1, dtype: object

In [70]:
df

Unnamed: 0,account,Jan,Feb,Mar,Apr
0,Jones LLC,150,200,140,200.0
1,Alpha Co,200,210,215,
2,Blue Inc,50,90,95,300.0


In [68]:
df[df['Apr'].notnull()]

Unnamed: 0,account,Jan,Feb,Mar,Apr
0,Jones LLC,150,200,140,200.0
2,Blue Inc,50,90,95,300.0


In [73]:
df.fillna(method='bfill')

Unnamed: 0,account,Jan,Feb,Mar,Apr
0,Jones LLC,150,200,140,200.0
1,Alpha Co,200,210,215,300.0
2,Blue Inc,50,90,95,300.0


In [76]:
df

Unnamed: 0,account,Jan,Feb,Mar,Apr
0,Jones LLC,150,200,140,200.0
1,Alpha Co,200,210,215,
2,Blue Inc,50,90,95,300.0


In [77]:
df.sort_values('Mar')

Unnamed: 0,account,Jan,Feb,Mar,Apr
2,Blue Inc,50,90,95,300.0
0,Jones LLC,150,200,140,200.0
1,Alpha Co,200,210,215,


#### Column oriented with dicts

If you would like to create a DataFrame in a “column oriented” manner, you would use __from_dict__

In [4]:
sales = {'account': ['Jones LLC', 'Alpha Co', 'Blue Inc'],
         'Jan': [150, 200, 50],
         'Feb': [200, 210, 90],
         'Mar': [140, 215, 95]}

df = pd.DataFrame.from_dict(sales)

df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,140
1,Alpha Co,200,210,215
2,Blue Inc,50,90,95


#### Reorder the columns

In [4]:
df = df[['account', 'Jan', 'Feb', 'Mar']]
df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,140
1,Alpha Co,200,210,215
2,Blue Inc,50,90,95


### Row oriented with lists

This approach is similar to the dictionary approach but you need to explicitly call out the column labels.

In [5]:
sales = [('Jones LLC', 150, 200, 50),
         ('Alpha Co', 200, 210, 90),
         ('Blue Inc', 140, 215, 95)]
labels = ['account', 'Jan', 'Feb', 'Mar']

df = pd.DataFrame.from_records(sales, columns=labels)

df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,50
1,Alpha Co,200,210,90
2,Blue Inc,140,215,95


### Column oriented with lists

The second method is the from_items which is column oriented and actually looks similar to the OrderedDict example above.

In [6]:
sales = [('account', ['Jones LLC', 'Alpha Co', 'Blue Inc']),
         ('Jan', [150, 200, 50]),
         ('Feb', [200, 210, 90]),
         ('Mar', [140, 215, 95])]

# df = pd.DataFrame.from_items(sales)         # depricated

df = pd.DataFrame.from_dict(dict(sales))

df

Unnamed: 0,account,Jan,Feb,Mar
0,Jones LLC,150,200,140
1,Alpha Co,200,210,215
2,Blue Inc,50,90,95


### Exporation

In [5]:
def print_with_header(header, var):
    print('\n' + header + ':')
    print(var)
    
print_with_header('df.ndim', df.ndim)
print_with_header('df.size', df.size)
print_with_header('df.shape', df.shape)
print_with_header('df.axes', df.axes)
print_with_header('df.index', df.index)
print_with_header('df.columns', df.columns)
print_with_header('df.dtypes', df.dtypes)
print_with_header('df.count()', df.count())
print_with_header('df.describe()', df.describe())

print_with_header('df.head()', df.head())
print_with_header('df.tail()', df.tail())



df.ndim:
2

df.size:
12

df.shape:
(3, 4)

df.axes:
[RangeIndex(start=0, stop=3, step=1), Index(['account', 'Jan', 'Feb', 'Mar'], dtype='object')]

df.index:
RangeIndex(start=0, stop=3, step=1)

df.columns:
Index(['account', 'Jan', 'Feb', 'Mar'], dtype='object')

df.dtypes:
account    object
Jan         int64
Feb         int64
Mar         int64
dtype: object

df.count():
account    3
Jan        3
Feb        3
Mar        3
dtype: int64

df.describe():
              Jan         Feb         Mar
count    3.000000    3.000000    3.000000
mean   133.333333  166.666667  150.000000
std     76.376262   66.583281   60.621778
min     50.000000   90.000000   95.000000
25%    100.000000  145.000000  117.500000
50%    150.000000  200.000000  140.000000
75%    175.000000  205.000000  177.500000
max    200.000000  210.000000  215.000000

df.head():
     account  Jan  Feb  Mar
0  Jones LLC  150  200  140
1   Alpha Co  200  210  215
2   Blue Inc   50   90   95

df.tail():
     account  Jan  Feb  Mar
0 

In [8]:
df['account'].astype('category')

0    Jones LLC
1     Alpha Co
2     Blue Inc
Name: account, dtype: category
Categories (3, object): [Alpha Co, Blue Inc, Jones LLC]

In [9]:
df['account'].astype('string')

0    Jones LLC
1     Alpha Co
2     Blue Inc
Name: account, dtype: string

In [7]:
# df['account'].convert_dtypes()          # available in pandas 1.0+

0    Jones LLC
1     Alpha Co
2     Blue Inc
Name: account, dtype: category
Categories (3, object): [Alpha Co, Blue Inc, Jones LLC]