# Viewing

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [1]:
import numpy as np
import pandas as pd

### Creating Series & df

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range('20210629', periods=6)
dates

DatetimeIndex(['2021-06-29', '2021-06-30', '2021-07-01', '2021-07-02',
               '2021-07-03', '2021-07-04'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2021-06-29,1.788012,-1.689508,1.566122,0.098571
2021-06-30,0.940526,0.138319,1.462119,-0.601579
2021-07-01,-0.944202,-0.097555,1.728288,0.861634
2021-07-02,-0.557406,1.04357,1.398022,-0.677568
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283
2021-07-04,1.302261,0.551452,1.069123,1.546181


In [9]:
df2 = pd.DataFrame({'A': 1.,
                       'B': pd.Timestamp('20130102'),
                       'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                       'D': np.array([3] * 4, dtype='int32'),
                       'E': pd.Categorical(["test", "train", "test", "train"]),
                       'F': 'foo'})
df2
df2.dtypes

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [10]:
df.head()
df.tail(2)

Unnamed: 0,A,B,C,D
2021-06-29,1.788012,-1.689508,1.566122,0.098571
2021-06-30,0.940526,0.138319,1.462119,-0.601579
2021-07-01,-0.944202,-0.097555,1.728288,0.861634
2021-07-02,-0.557406,1.04357,1.398022,-0.677568
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283


Unnamed: 0,A,B,C,D
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283
2021-07-04,1.302261,0.551452,1.069123,1.546181


In [11]:
df.index
df.columns

DatetimeIndex(['2021-06-29', '2021-06-30', '2021-07-01', '2021-07-02',
               '2021-07-03', '2021-07-04'],
              dtype='datetime64[ns]', freq='D')

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
np_array = df.to_numpy()
np_array
np_array.dtype

array([[ 1.7880124 , -1.68950756,  1.56612216,  0.0985709 ],
       [ 0.94052554,  0.1383186 ,  1.46211864, -0.60157875],
       [-0.94420153, -0.0975547 ,  1.72828768,  0.86163426],
       [-0.55740586,  1.04357045,  1.39802221, -0.67756839],
       [-0.21774184, -0.10392406, -0.42620572, -0.65328316],
       [ 1.30226131,  0.55145161,  1.06912339,  1.54618069]])

dtype('float64')

> Numpy arrays only support 1 datatype, while pandas dataframes support 1 datatype per column
> If a dataframe has more than 1 datatype numpy will convert all the data to an appropriate datatype  
> This can be expensive if everything has to be converted to 'object' type

**Transpose Data**

In [15]:
df
df.T

Unnamed: 0,A,B,C,D
2021-06-29,1.788012,-1.689508,1.566122,0.098571
2021-06-30,0.940526,0.138319,1.462119,-0.601579
2021-07-01,-0.944202,-0.097555,1.728288,0.861634
2021-07-02,-0.557406,1.04357,1.398022,-0.677568
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283
2021-07-04,1.302261,0.551452,1.069123,1.546181


Unnamed: 0,2021-06-29,2021-06-30,2021-07-01,2021-07-02,2021-07-03,2021-07-04
A,1.788012,0.940526,-0.944202,-0.557406,-0.217742,1.302261
B,-1.689508,0.138319,-0.097555,1.04357,-0.103924,0.551452
C,1.566122,1.462119,1.728288,1.398022,-0.426206,1.069123
D,0.098571,-0.601579,0.861634,-0.677568,-0.653283,1.546181


**Sort columns / rows**

In [19]:
df.sort_index(axis="columns", ascending=False)
df.sort_index(axis="rows", ascending=False)

Unnamed: 0,D,C,B,A
2021-06-29,0.098571,1.566122,-1.689508,1.788012
2021-06-30,-0.601579,1.462119,0.138319,0.940526
2021-07-01,0.861634,1.728288,-0.097555,-0.944202
2021-07-02,-0.677568,1.398022,1.04357,-0.557406
2021-07-03,-0.653283,-0.426206,-0.103924,-0.217742
2021-07-04,1.546181,1.069123,0.551452,1.302261


Unnamed: 0,A,B,C,D
2021-07-04,1.302261,0.551452,1.069123,1.546181
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283
2021-07-02,-0.557406,1.04357,1.398022,-0.677568
2021-07-01,-0.944202,-0.097555,1.728288,0.861634
2021-06-30,0.940526,0.138319,1.462119,-0.601579
2021-06-29,1.788012,-1.689508,1.566122,0.098571


In [27]:
df.sort_values("A")

Unnamed: 0,A,B,C,D
2021-07-01,-0.944202,-0.097555,1.728288,0.861634
2021-07-02,-0.557406,1.04357,1.398022,-0.677568
2021-07-03,-0.217742,-0.103924,-0.426206,-0.653283
2021-06-30,0.940526,0.138319,1.462119,-0.601579
2021-07-04,1.302261,0.551452,1.069123,1.546181
2021-06-29,1.788012,-1.689508,1.566122,0.098571
