In [1]:
import numpy as np
import pandas as pd

In [2]:
t = pd.Series(np.random.randn(10))

In [3]:
t

0    0.295602
1    1.025389
2    0.295315
3   -0.031107
4   -0.556406
5   -0.943724
6   -1.557968
7   -1.424643
8   -0.056233
9   -0.039387
dtype: float64

In [4]:
t[2:-2] = np.nan

In [5]:
t

0    0.295602
1    1.025389
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8   -0.056233
9   -0.039387
dtype: float64

In [6]:
#note that it is giving you the idea of where the elements are concentrated
t.to_sparse()

0    0.295602
1    1.025389
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8   -0.056233
9   -0.039387
dtype: float64
BlockIndex
Block locations: array([0, 8])
Block lengths: array([2, 2])

In [7]:
t.fillna(0)

0    0.295602
1    1.025389
2    0.000000
3    0.000000
4    0.000000
5    0.000000
6    0.000000
7    0.000000
8   -0.056233
9   -0.039387
dtype: float64

In [8]:
t.fillna(0).to_sparse()

0    0.295602
1    1.025389
2    0.000000
3    0.000000
4    0.000000
5    0.000000
6    0.000000
7    0.000000
8   -0.056233
9   -0.039387
dtype: float64
BlockIndex
Block locations: array([0])
Block lengths: array([10])

In [10]:
t.fillna(0).to_sparse(fill_value = 0) 
#the main difference between the above and this is that in this we are saying to sparse command that '0' is the value filled byus

0    0.295602
1    1.025389
2    0.000000
3    0.000000
4    0.000000
5    0.000000
6    0.000000
7    0.000000
8   -0.056233
9   -0.039387
dtype: float64
BlockIndex
Block locations: array([0, 8])
Block lengths: array([2, 2])

In [11]:
t.to_sparse().density
#we can get density and sparse is mostly used for memory efficient reasons

0.4

In [13]:
# A Sparse object can be converted back again to dense using to_dense command
t = t.to_sparse()

In [14]:
t.to_dense()

0    0.295602
1    1.025389
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8   -0.056233
9   -0.039387
dtype: float64

In [20]:
b = np.random.randn(10)

In [21]:
b[2:4] = np.nan

In [22]:
b[7:9] = np.nan

In [23]:
b

array([ 1.52402831,  1.79171735,         nan,         nan,  0.57483907,
       -0.70694718,  1.23719924,         nan,         nan,  0.55096329])

In [24]:
b.to_sparse()

AttributeError: 'numpy.ndarray' object has no attribute 'to_sparse'

In [None]:
#you cannot directly apply to_sparse
#so you can use SparseArray

In [26]:
sparr = pd.SparseArray(b)

In [29]:
sparr

[1.52402830626, 1.79171734929, nan, nan, 0.574839073209, -0.706947178984, 1.23719924295, nan, nan, 0.550963290048]
Fill: nan
IntIndex
Indices: array([0, 1, 4, 5, 6, 9])

In [28]:
sparr.to_dense()

array([ 1.52402831,  1.79171735,         nan,         nan,  0.57483907,
       -0.70694718,  1.23719924,         nan,         nan,  0.55096329])

In [None]:
#you can also create a sparase array dynamic collection just append to a initial array

In [31]:
spl = pd.SparseList()

  """Entry point for launching an IPython kernel.


In [34]:
#name sparselist because this is a list of collection of sparse array
spl.append(t)
spl.append(sparr)

In [35]:
spl           #have a look at the output

<pandas.core.sparse.list.SparseList object at 0x0000024B56803080>
[0.295602238929, 1.02538931936, nan, nan, nan, nan, nan, nan, -0.0562331854322, -0.0393869099645]
Fill: nan
BlockIndex
Block locations: array([0, 8])
Block lengths: array([2, 2])
[0.295602238929, 1.02538931936, nan, nan, nan, nan, nan, nan, -0.0562331854322, -0.0393869099645]
Fill: nan
BlockIndex
Block locations: array([0, 8])
Block lengths: array([2, 2])
[1.52402830626, 1.79171734929, nan, nan, 0.574839073209, -0.706947178984, 1.23719924295, nan, nan, 0.550963290048]
Fill: nan
IntIndex
Indices: array([0, 1, 4, 5, 6, 9])

In [36]:
spl.to_array()   #this will bring back all sparse arrays into one single list

[0.295602238929, 1.02538931936, nan, nan, nan, nan, nan, nan, -0.0562331854322, -0.0393869099645, 0.295602238929, 1.02538931936, nan, nan, nan, nan, nan, nan, -0.0562331854322, -0.0393869099645, 1.52402830626, 1.79171734929, nan, nan, 0.574839073209, -0.706947178984, 1.23719924295, nan, nan, 0.550963290048]
Fill: nan
IntIndex
Indices: array([ 0,  1,  8,  9, 10, 11, 18, 19, 20, 21, 24, 25, 26, 29])

In [None]:
# Sparseindex is of two types but pandas document recommends using block format index compared to integer type index 
#block format just keeps the location but integer keeps everything so block preferred