## Pandas Basic Introduction

In [1]:
import pandas as pd
import numpy  as np

### Create Pandas Series
* pd.Series : create a serie from list
* pd.date_range : create data time serie

In [2]:
s = pd.Series([1, 4, np.nan, 1])
print(s)

0    1.0
1    4.0
2    NaN
3    1.0
dtype: float64


In [3]:
dates = pd.date_range('20170101', periods=4)
print(dates)

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')


### Create Pandas Data Frame
* pd.DataFrame( *array* , *index* , *columns* )
* pd.DataFrame( *dict* )

In [4]:
print("Without specifying the index and columns")
df = pd.DataFrame(np.random.randn(4,6))
print(df)

Without specifying the index and columns
          0         1         2         3         4         5
0 -0.243915  1.924518 -0.392198  0.649233  0.126214 -0.217729
1 -1.466426  0.406014 -0.231933 -0.476125 -1.012357  0.723443
2  0.033098  1.021574  1.331821  0.717572 -1.613766  0.360405
3  0.389376 -0.896964 -1.158504  0.180991 -0.109169 -0.119331


In [5]:
print("Specifying the index with dates and columns with chars")
df = pd.DataFrame(np.random.randn(4,6), index=dates, columns=["a", "b", "c", "d", "e", "f"])
print(df)

Specifying the index with dates and columns with chars
                   a         b         c         d         e         f
2017-01-01  1.066058 -0.021511 -0.307976 -0.908835 -0.032229 -0.229734
2017-01-02  0.308851  0.139270  0.085891 -1.400226 -0.901680  0.011989
2017-01-03 -0.192360  0.269176 -0.269251  1.305065 -0.933242 -0.514104
2017-01-04 -0.513891 -2.000638  0.575219 -1.090500  1.375195  0.334193


In [6]:
print("Define data frame from dictionary")
df = pd.DataFrame({
    "A": 1,
    "B": pd.Timestamp('20130102'),
    "C": pd.Series(np.arange(4)),
    'D': np.array([3]*4, dtype=np.int),
    "E": pd.Categorical(["test", "main", "test", "main"]),
    "F": 'foo'
})
print(df)

Define data frame from dictionary
   A          B  C  D     E    F
0  1 2013-01-02  0  3  test  foo
1  1 2013-01-02  1  3  main  foo
2  1 2013-01-02  2  3  test  foo
3  1 2013-01-02  3  3  main  foo


### Get Data Frame's Attributes

* df.dtypes
* df.index
* df.columns
* df.values
* df.describe()

In [7]:
print(df.dtypes)

A             int64
B    datetime64[ns]
C             int64
D             int64
E          category
F            object
dtype: object


In [8]:
print(df.index)

RangeIndex(start=0, stop=4, step=1)


In [9]:
print(df.columns)

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')


In [10]:
print(df.values)

[[1 Timestamp('2013-01-02 00:00:00') 0 3 'test' 'foo']
 [1 Timestamp('2013-01-02 00:00:00') 1 3 'main' 'foo']
 [1 Timestamp('2013-01-02 00:00:00') 2 3 'test' 'foo']
 [1 Timestamp('2013-01-02 00:00:00') 3 3 'main' 'foo']]


In [11]:
df.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.5,3.0
std,0.0,1.290994,0.0
min,1.0,0.0,3.0
25%,1.0,0.75,3.0
50%,1.0,1.5,3.0
75%,1.0,2.25,3.0
max,1.0,3.0,3.0


### Transpose data frame
* df.T

In [12]:
df.T

Unnamed: 0,0,1,2,3
A,1,1,1,1
B,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00
C,0,1,2,3
D,3,3,3,3
E,test,main,test,main
F,foo,foo,foo,foo


### Sort by Index
* df.sort_index( *axis* , *ascending* )

In [13]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D,E,F
3,1,2013-01-02,3,3,main,foo
2,1,2013-01-02,2,3,test,foo
1,1,2013-01-02,1,3,main,foo
0,1,2013-01-02,0,3,test,foo


In [14]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,F,E,D,C,B,A
0,foo,test,3,0,2013-01-02,1
1,foo,main,3,1,2013-01-02,1
2,foo,test,3,2,2013-01-02,1
3,foo,main,3,3,2013-01-02,1


### Sort by Value
* df.sort_values( *by* )

In [15]:
df.sort_values(by="E")

Unnamed: 0,A,B,C,D,E,F
1,1,2013-01-02,1,3,main,foo
3,1,2013-01-02,3,3,main,foo
0,1,2013-01-02,0,3,test,foo
2,1,2013-01-02,2,3,test,foo
