In [1]:
from IPython.display import display

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## Series

In [3]:
s = pd.Series([1,2,3,np.nan,6,9,8,4,8])

s.head(3)

0    1.0
1    2.0
2    3.0
dtype: float64

## DataFrame

In [4]:
df = pd.DataFrame({
    'one': pd.Series(np.random.randn(4), index=['a','b','c','e']),
    'two': pd.Series(np.random.randn(4), index=['a','b','c','i']),
    'three': pd.Series(np.random.randn(7), index=['b','c','d','e','f','g','h'])
    })

display(df)

Unnamed: 0,one,two,three
a,0.8721,0.252313,
b,-1.248452,-0.915259,-0.28228
c,0.804611,1.884097,0.42804
d,,,1.221379
e,2.004291,,-0.936982
f,,,-1.275059
g,,,0.008727
h,,,0.069121
i,,1.158641,


In [5]:
display(df.dtypes)
display(df.shape)
display(df.describe())

display(df.values)
display(df.columns) # Col
display(df.index) # Row

display(df['two'].unique())

one      float64
two      float64
three    float64
dtype: object

(9, 3)

Unnamed: 0,one,two,three
count,4.0,4.0,7.0
mean,0.608138,0.594948,-0.109579
std,1.354554,1.207998,0.834583
min,-1.248452,-0.915259,-1.275059
25%,0.291345,-0.03958,-0.609631
50%,0.838355,0.705477,0.008727
75%,1.155148,1.340005,0.248581
max,2.004291,1.884097,1.221379


array([[ 0.87210017,  0.25231323,         nan],
       [-1.2484518 , -0.91525921, -0.28227954],
       [ 0.8046108 ,  1.88409694,  0.4280401 ],
       [        nan,         nan,  1.2213789 ],
       [ 2.00429129,         nan, -0.93698169],
       [        nan,         nan, -1.27505893],
       [        nan,         nan,  0.00872712],
       [        nan,         nan,  0.06912103],
       [        nan,  1.15864075,         nan]])

Index(['one', 'two', 'three'], dtype='object')

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'], dtype='object')

array([ 0.25231323, -0.91525921,  1.88409694,         nan,  1.15864075])

In [6]:
display(df.isnull())
display(df.isnull().sum())

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,True,False
e,False,True,False
f,True,True,False
g,True,True,False
h,True,True,False
i,True,False,True


one      5
two      5
three    2
dtype: int64

#### Transpose

In [7]:
df.T

Unnamed: 0,a,b,c,d,e,f,g,h,i
one,0.8721,-1.248452,0.804611,,2.004291,,,,
two,0.252313,-0.915259,1.884097,,,,,,1.158641
three,,-0.28228,0.42804,1.221379,-0.936982,-1.275059,0.008727,0.069121,


#### Sort by axis

In [8]:
# row - axis 0
df.sort_index(axis=0, ascending=False)

Unnamed: 0,one,two,three
i,,1.158641,
h,,,0.069121
g,,,0.008727
f,,,-1.275059
e,2.004291,,-0.936982
d,,,1.221379
c,0.804611,1.884097,0.42804
b,-1.248452,-0.915259,-0.28228
a,0.8721,0.252313,


In [9]:
# column - axis 1
df.sort_index(axis=1, ascending=False)

Unnamed: 0,two,three,one
a,0.252313,,0.8721
b,-0.915259,-0.28228,-1.248452
c,1.884097,0.42804,0.804611
d,,1.221379,
e,,-0.936982,2.004291
f,,-1.275059,
g,,0.008727,
h,,0.069121,
i,1.158641,,


## List to df

In [None]:
book_details = [
    ['Harry Potter', 'J.K. Rowling', 101.77, 22],
    ['Competitive Programming 3', 'Steven halim', 30.05, 10]
]

In [None]:
df = pd.DataFrame(book_details, columns = ['Title', 'Author', 'Price', 'Quantity in Stock'])
df

## Read/write
#### save/read json

```python
df.to_json(json_file, orient='records', lines=True)
df = pd.read_json(json_file, lines=True)
```

#### save/read csv

```python
df.to_csv(csv_file, index=False)
df.read_csv(csv_file)
```