In [52]:
import numpy as np
import pandas as pd

### From dict of Series or dicts

In [53]:

d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [54]:
df = pd.DataFrame(d)
df


Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [55]:
pd.DataFrame(d, index=["d", "b", "a"])


Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [56]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])


Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [57]:
#get index
df.index


Index(['a', 'b', 'c', 'd'], dtype='object')

In [58]:
#get columns
df.columns

Index(['one', 'two'], dtype='object')

### From dict of ndarrays / lists¶

In [59]:
d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
pd.DataFrame(d)


Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [60]:
pd.DataFrame(d, index=["a", "b", "c", "d"])


Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


### From structured or record array

In [61]:
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
data
# np.zeros((2,))


array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [62]:
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
data


array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [63]:
pd.DataFrame(data)


Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


### From a list of dicts

In [64]:
data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
data2

[{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [65]:
pd.DataFrame(data2)


Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [66]:
pd.DataFrame(data2, index=["first", "second"])


Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [67]:
pd.DataFrame(data2, columns=["a", "b"])


Unnamed: 0,a,b
0,1,2
1,5,10


### From a dict of tuples¶

In [68]:
pd.DataFrame(
    {
        ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
        ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
        ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
        ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
        ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


### From a list of namedtuples

In [69]:
from collections import namedtuple
Point = namedtuple("Point", "x y")


In [70]:
pd.DataFrame([Point(0, 0), Point(0, 3), (2, 3)])


Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [72]:
Point3D = namedtuple("Point3D", "x y z")


In [73]:
pd.DataFrame([Point3D(0, 0, 0), Point3D(0, 3, 5), Point(2, 3)])


Unnamed: 0,x,y,z
0,0,0,0.0
1,0,3,5.0
2,2,3,


In [78]:
# data clesses
from dataclasses import make_dataclass
Point = make_dataclass("Point", [("x", int), ("y", int)])
pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])


Unnamed: 0,0
0,"Point(x=0, y=0)"
1,"Point(x=0, y=3)"
2,"Point(x=2, y=3)"


### Alternate constructors
#### from dict

In [79]:
pd.DataFrame.from_dict(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]))


Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [80]:
pd.DataFrame.from_dict(
    dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]),
    orient="index",
    columns=["one", "two", "three"],
)


Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


### DataFrame.from_records

In [81]:
data


array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [82]:
pd.DataFrame.from_records(data, index="C")


Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


### Column selection, addition, deletion


In [84]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [85]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [87]:
df["three"] = df["one"] * df["two"]
df


Unnamed: 0,one,two,three
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [89]:
df["flag"] = df["one"] > 2
df


Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [90]:
del df["two"]
df

Unnamed: 0,one,three,flag
a,1.0,1.0,False
b,2.0,4.0,False
c,3.0,9.0,True
d,,,False


In [91]:
three = df.pop("three")
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [93]:
df["foo"] = "bar"
df


Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar
