In [1]:
import numpy as np
import pandas as pd

## Object creation

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range('20130101',periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.294524,-1.117774,1.440939,-1.265328
2013-01-02,-0.540434,0.120004,-0.755097,1.27275
2013-01-03,0.019273,-0.921255,0.391546,-0.626436
2013-01-04,-0.808458,1.22327,0.926827,-0.186761
2013-01-05,-1.056593,-1.357841,-1.03485,-0.636046
2013-01-06,0.407776,-1.534396,-1.18036,0.734435


Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [16]:
df2 = pd.DataFrame({
        'A': 1.,
        'B':pd.Timestamp('20150105'),
        'C':pd.Series(1,index=list(range(4)),dtype='complex'),
        'D':np.array([3]*4,dtype='int32'),    
        'E':pd.Categorical(['test','train','test','train'])
})
df2

Unnamed: 0,A,B,C,D,E
0,1.0,2015-01-05,1.000000+0.000000j,3,test
1,1.0,2015-01-05,1.000000+0.000000j,3,train
2,1.0,2015-01-05,1.000000+0.000000j,3,test
3,1.0,2015-01-05,1.000000+0.000000j,3,train


The columns of the resulting DataFrame have different dtypes.

In [17]:
df2.dtypes

A           float64
B    datetime64[ns]
C        complex128
D             int32
E          category
dtype: object

In [32]:
df2.all()

<bound method DataFrame.all of      A          B                   C  D      E
0  1.0 2015-01-05  1.000000+0.000000j  3   test
1  1.0 2015-01-05  1.000000+0.000000j  3  train
2  1.0 2015-01-05  1.000000+0.000000j  3   test
3  1.0 2015-01-05  1.000000+0.000000j  3  train>

## Viewing data

In [34]:
df.head(2)

Unnamed: 0,A,B,C,D
2013-01-01,-0.294524,-1.117774,1.440939,-1.265328
2013-01-02,-0.540434,0.120004,-0.755097,1.27275


In [35]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.808458,1.22327,0.926827,-0.186761
2013-01-05,-1.056593,-1.357841,-1.03485,-0.636046
2013-01-06,0.407776,-1.534396,-1.18036,0.734435


Display the index, columns:

In [41]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [44]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

DataFrame.to_numpy() gives a NumPy representation of the underlying data. Note that this can be an expensive operation when your DataFrame has columns with different data types, which comes down to a fundamental difference between pandas and NumPy: NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column. When you call DataFrame.to_numpy(), pandas will find the NumPy dtype that can hold all of the dtypes in the DataFrame. This may end up being object, which requires casting every value to a Python object.

For df, our DataFrame of all floating-point values, DataFrame.to_numpy() is fast and doesn’t require copying data.

In [45]:
df.to_numpy()

array([[-0.29452393, -1.11777356,  1.44093868, -1.2653281 ],
       [-0.54043402,  0.12000428, -0.75509681,  1.27274992],
       [ 0.01927334, -0.92125511,  0.39154638, -0.62643643],
       [-0.80845756,  1.22326959,  0.92682675, -0.18676097],
       [-1.05659303, -1.35784102, -1.03484994, -0.63604552],
       [ 0.40777561, -1.53439602, -1.18036041,  0.73443502]])

For df2, the DataFrame with multiple dtypes, DataFrame.to_numpy() is relatively expensive.

In [46]:
df2.to_numpy()

array([[1.0, Timestamp('2015-01-05 00:00:00'), (1+0j), 3, 'test'],
       [1.0, Timestamp('2015-01-05 00:00:00'), (1+0j), 3, 'train'],
       [1.0, Timestamp('2015-01-05 00:00:00'), (1+0j), 3, 'test'],
       [1.0, Timestamp('2015-01-05 00:00:00'), (1+0j), 3, 'train']],
      dtype=object)

describe() shows a quick statistic summary of your data:

In [47]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.378827,-0.597999,-0.035166,-0.117898
std,0.539333,1.064087,1.105942,0.94961
min,-1.056593,-1.534396,-1.18036,-1.265328
25%,-0.741452,-1.297824,-0.964912,-0.633643
50%,-0.417479,-1.019514,-0.181775,-0.406599
75%,-0.059176,-0.140311,0.793007,0.504136
max,0.407776,1.22327,1.440939,1.27275


Transposing your data:

In [49]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.294524,-0.540434,0.019273,-0.808458,-1.056593,0.407776
B,-1.117774,0.120004,-0.921255,1.22327,-1.357841,-1.534396
C,1.440939,-0.755097,0.391546,0.926827,-1.03485,-1.18036
D,-1.265328,1.27275,-0.626436,-0.186761,-0.636046,0.734435
