In [1]:
from IPython.display import display

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## Series

In [3]:
s = pd.Series([1,2,3,np.nan,6,9,8,4,8])

s.head(3)

0    1.0
1    2.0
2    3.0
dtype: float64

## DataFrame

In [4]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(4), index=['a','b','c','e']),
    'two': pd.Series(np.random.randn(4), index=['a','b','c','i']),
    'three': pd.Series(np.random.randn(7), index=['b','c','d','e','f','g','h'])
    })

display(df)

Unnamed: 0,one,two,three
a,0.670533,0.961305,
b,0.313373,-0.874673,-0.551033
c,-0.659191,0.065563,1.696877
d,,,1.710299
e,0.31606,,0.073299
f,,,0.23883
g,,,-1.115367
h,,,1.582213
i,,-2.236466,


In [5]:
display(df.dtypes)
display(df.shape)
display(df.describe())

display(df.values)
display(df.columns) # Col
display(df.index) # Row

display(df['two'].unique())

one      float64
two      float64
three    float64
dtype: object

(9, 3)

Unnamed: 0,one,two,three
count,4.0,4.0,7.0
mean,0.160194,-0.521068,0.519303
std,0.57143,1.367381,1.156884
min,-0.659191,-2.236466,-1.115367
25%,0.070232,-1.215121,-0.238867
50%,0.314717,-0.404555,0.23883
75%,0.404678,0.289498,1.639545
max,0.670533,0.961305,1.710299


array([[ 0.6705326 ,  0.96130513,         nan],
       [ 0.31337331, -0.8746731 , -0.55103333],
       [-0.6591912 ,  0.06556285,  1.69687674],
       [        nan,         nan,  1.71029948],
       [ 0.31606025,         nan,  0.07329948],
       [        nan,         nan,  0.23882954],
       [        nan,         nan, -1.11536668],
       [        nan,         nan,  1.58221316],
       [        nan, -2.23646625,         nan]])

Index(['one', 'two', 'three'], dtype='object')

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'], dtype='object')

array([ 0.96130513, -0.8746731 ,  0.06556285,         nan, -2.23646625])

In [6]:
display(df.isnull())
display(df.isnull().sum())

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,True,False
e,False,True,False
f,True,True,False
g,True,True,False
h,True,True,False
i,True,False,True


one      5
two      5
three    2
dtype: int64

#### Transpose

In [7]:
df.T

Unnamed: 0,a,b,c,d,e,f,g,h,i
one,0.670533,0.313373,-0.659191,,0.31606,,,,
two,0.961305,-0.874673,0.065563,,,,,,-2.236466
three,,-0.551033,1.696877,1.710299,0.073299,0.23883,-1.115367,1.582213,


#### Sort by axis

In [8]:
# row - axis 0
df.sort_index(axis=0, ascending=False)

Unnamed: 0,one,two,three
i,,-2.236466,
h,,,1.582213
g,,,-1.115367
f,,,0.23883
e,0.31606,,0.073299
d,,,1.710299
c,-0.659191,0.065563,1.696877
b,0.313373,-0.874673,-0.551033
a,0.670533,0.961305,


In [9]:
# column - axis 1
df.sort_index(axis=1, ascending=False)

Unnamed: 0,two,three,one
a,0.961305,,0.670533
b,-0.874673,-0.551033,0.313373
c,0.065563,1.696877,-0.659191
d,,1.710299,
e,,0.073299,0.31606
f,,0.23883,
g,,-1.115367,
h,,1.582213,
i,-2.236466,,


## List to df

In [10]:
book_details = [
    ['Harry Potter', 'J.K. Rowling', 101.77, 22],
    ['Competitive Programming 3', 'Steven halim', 30.05, 10]
]

In [11]:
df = pd.DataFrame(book_details, columns = ['Title', 'Author', 'Price', 'Quantity in Stock'])
df

Unnamed: 0,Title,Author,Price,Quantity in Stock
0,Harry Potter,J.K. Rowling,101.77,22
1,Competitive Programming 3,Steven halim,30.05,10


## Iterate df by rows

In [12]:
for idx, row in df[:5].iterrows():
    print(row['Title'], row['Price'])

Harry Potter 101.77
Competitive Programming 3 30.05


## Join df

```py
df_merged = pd.merge(df_1, df_2, on='id')
```

## Read/write
#### save/read json

```python
df.to_json(json_file, orient='records', lines=True)
df = pd.read_json(json_file, lines=True)
```

#### save/read csv

```python
df.to_csv(csv_file, index=False)
df.read_csv(csv_file, delimiter=',')
```

#### save to table

```python
df.to_html('temp.html')
```