In [1]:
import numpy as np
import pandas as pd

In [3]:
# Dataframes can take dicts as input
d = {
    'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
    'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])
}

In [4]:
df = pd.DataFrame(d)

In [5]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [6]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [7]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [8]:
# from dictionary of ndarrays or lists
#ndarrays must all be of the same length, and if an index is passed, it 
# must also be of the same lenghts as the arrays 
d = {
    'one': [1., 2., 3., 4.],
    'two': [4., 3., 2., 1.]
}

In [9]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [12]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [16]:
# can be created from series, index will be the same as input Series,
# and one column whose name is the original name of the series
pd.DataFrame(pd.Series(np.random.randn(5), name='gogo'))

Unnamed: 0,gogo
0,0.16555
1,-1.341058
2,-2.258443
3,-1.242039
4,0.200197


In [17]:
# coolumn selection = semantically the same as a dict
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [18]:
df['three'] = df['one'] * df['two']

In [19]:
df['flag'] = df['one'] > 2

In [20]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [24]:
# delete like with a dict
del df['two']
df

Unnamed: 0,one,three,flag
a,1.0,1.0,False
b,2.0,4.0,False
c,3.0,9.0,True
d,,,False


In [25]:
dft = pd.DataFrame({'A': np.random.rand(3),
                        'B': 1,
                        'C': 'foo',
                        'D': pd.Timestamp('20010102'),
                        'E': pd.Series([1.0] * 3).astype('float32'),
                        'F': False,
                        'G': pd.Series([1] * 3, dtype='int8')})

In [26]:
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.534928,1,foo,2001-01-02,1.0,False,1
1,0.093028,1,foo,2001-01-02,1.0,False,1
2,0.685201,1,foo,2001-01-02,1.0,False,1


In [27]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [28]:
# string data forces an ``object`` dtype
pd.Series([1, 2, 3, 6., 'foo'])

0      1
1      2
2      3
3    6.0
4    foo
dtype: object

In [29]:
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype = 'float32')


In [30]:
df1.dtypes

A    float32
dtype: object

In [31]:
# conversion of dtypes
df1 = df1.astype('float64')

In [32]:
df1.dtypes

A    float64
dtype: object

In [33]:
dft1 = pd.DataFrame({
    'a': [1, 0, 1],
    'b': [4, 5, 6],
    'c': [7, 8, 9]
})

In [34]:
dft1 = dft1.astype({
    'a': np.bool,
    'c': np.float64
})

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  'a': np.bool,


In [36]:
dft1

Unnamed: 0,a,b,c
0,True,4,7.0
1,False,5,8.0
2,True,6,9.0
