# Introduction to Pandas Data Structures

__Import libries__

In [2]:
import numpy as np
import pandas as pd

Two primary data structures: 
1. Series
2. DataFrame 

The Series, as you will see, constitutes the data structure designed to accommodate a sequence of one-dimensional data, while the DataFrame, a more complex data structure, is designed to contain cases with several dimensions.

---

## The Series

The Series is the object of the pandas library designed to represent one-dimensional data structures, similarly to an array but with some additional features.

The structure of Series object:
![image.png](attachment:image.png)

### Declaring a Series

In [2]:
s = pd.Series([3, 5, 2, 7])
s

0    3
1    5
2    2
3    7
dtype: int64

In [5]:
s = pd.Series([4, 6, 3, 7], index=['a', 'b', 'c', 'd'])

In [8]:
s.name = 'sekar'

In [9]:
s.name

'sekar'

In [10]:
s

a    4
b    6
c    3
d    7
Name: sekar, dtype: int64

In [11]:
s.values

array([4, 6, 3, 7], dtype=int64)

In [12]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [13]:
list(s.index)

['a', 'b', 'c', 'd']

In [14]:
np.array(s.index)

array(['a', 'b', 'c', 'd'], dtype=object)

In [17]:
np.array(s.index, dtype='str')

array(['a', 'b', 'c', 'd'], dtype='<U1')

In [25]:
ser = pd.Series(np.random.randint(0, 10, 10), 
                index=np.arange(1, 11), name='series', dtype='int16')

In [26]:
ser

1     7
2     8
3     5
4     5
5     6
6     5
7     0
8     6
9     7
10    6
Name: series, dtype: int16

### Selecting the Internal Elements

In [27]:
ser[2]

8

In [28]:
ser.iloc[0]

7

In [29]:
ser.loc[1]

7

In [30]:
s = pd.Series(np.random.randint(0, 10, 10), index=list('abcdefghij'))

In [31]:
s

a    9
b    2
c    6
d    9
e    5
f    3
g    4
h    5
i    1
j    5
dtype: int32

In [32]:
s['d']

9

In [33]:
s.loc['d']

9

In [34]:
s.iloc[4]

5

In [35]:
s.iloc[2:8]

c    6
d    9
e    5
f    3
g    4
h    5
dtype: int32

In [36]:
s.loc['a':'e']

a    9
b    2
c    6
d    9
e    5
dtype: int32

In [37]:
s['a':'f']

a    9
b    2
c    6
d    9
e    5
f    3
dtype: int32

### Assigning Values to the Elements

In [38]:
s

a    9
b    2
c    6
d    9
e    5
f    3
g    4
h    5
i    1
j    5
dtype: int32

In [39]:
s['e'] = 9

In [40]:
s

a    9
b    2
c    6
d    9
e    9
f    3
g    4
h    5
i    1
j    5
dtype: int32

In [41]:
s[0]

9

In [42]:
s[1]

2

In [43]:
s[4] = 5

In [44]:
s

a    9
b    2
c    6
d    9
e    5
f    3
g    4
h    5
i    1
j    5
dtype: int32

### Defining Series from NumPy Arrays and Other Series

In [45]:
ser = pd.Series(np.random.randint(0, 10, 10))

In [46]:
ser

0    8
1    6
2    4
3    2
4    4
5    8
6    5
7    7
8    2
9    6
dtype: int32

In [47]:
arr = np.random.randint(0, 10, 5)
ser = pd.Series(arr)

In [48]:
ser

0    3
1    9
2    0
3    4
4    8
dtype: int32

In [49]:
arr[2] = 5

In [50]:
arr

array([3, 9, 5, 4, 8])

In [51]:
ser

0    3
1    9
2    5
3    4
4    8
dtype: int32

### Filtering Values

In [52]:
ser

0    3
1    9
2    5
3    4
4    8
dtype: int32

In [53]:
ser[ser > 5]

1    9
4    8
dtype: int32

### Operations and Mathematical Functions

In [54]:
ser

0    3
1    9
2    5
3    4
4    8
dtype: int32

In [55]:
ser / 2

0    1.5
1    4.5
2    2.5
3    2.0
4    4.0
dtype: float64

In [56]:
np.log(ser)

0    1.098612
1    2.197225
2    1.609438
3    1.386294
4    2.079442
dtype: float64

In [57]:
np.cos(ser)

0   -0.989992
1   -0.911130
2    0.283662
3   -0.653644
4   -0.145500
dtype: float64

### Evaluating Values

In [58]:
serd = pd.Series([1,0,2,1,2,3], 
                 index=['white','white','blue','green',
                        'green','yellow'])

In [60]:
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [61]:
serd.index.unique()

Index(['white', 'blue', 'green', 'yellow'], dtype='object')

In [62]:
serd.unique()

array([1, 0, 2, 3], dtype=int64)

In [63]:
serd.value_counts()

2    2
1    2
3    1
0    1
dtype: int64

In [66]:
ser = pd.Series(np.random.randint(0, 2, 10))

In [67]:
ser

0    0
1    0
2    1
3    1
4    0
5    0
6    1
7    0
8    1
9    0
dtype: int32

In [69]:
ser.value_counts()

0    6
1    4
dtype: int64

In [70]:
ser = pd.Series(np.random.randint(0, 5, 100))

In [71]:
ser.value_counts()

1    27
4    24
3    17
0    17
2    15
dtype: int64

In [74]:
type(pd.Series(np.random.randint(0, 2, 10)))

pandas.core.series.Series

In [75]:
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [76]:
serd.isin([0, 1])

white      True
white      True
blue      False
green      True
green     False
yellow    False
dtype: bool

In [77]:
serd[serd.isin([0, 1])]

white    1
white    0
green    1
dtype: int64

In [78]:
r = np.arange(0, 5)
r

array([0, 1, 2, 3, 4])

In [80]:
r[[True, False, True, False, True]]

array([0, 2, 4])

In [3]:
ser = pd.Series(np.random.randint(0, 3, 10))

In [4]:
ser

0    1
1    0
2    2
3    0
4    1
5    2
6    2
7    1
8    1
9    0
dtype: int32

In [5]:
ser.value_counts()

1    4
2    3
0    3
dtype: int64

In [7]:
ser.unique()

array([1, 0, 2], dtype=int64)

In [8]:
ser.isin([0])

0    False
1     True
2    False
3     True
4    False
5    False
6    False
7    False
8    False
9     True
dtype: bool

In [9]:
ser[ser.isin([0])]

1    0
3    0
9    0
dtype: int32

### NaN Values

In [14]:
a = np.random.randn(10)

In [15]:
a

array([ 0.02903468, -1.00152734,  1.08837905, -1.34607445, -1.38000634,
        1.11811987, -1.72101372,  1.57959331,  0.51788796,  0.12427373])

In [16]:
np.log(a)

  """Entry point for launching an IPython kernel.


array([-3.53926429,         nan,  0.08468947,         nan,         nan,
        0.11164859,         nan,  0.45716741, -0.65799636, -2.08526869])

In [19]:
s = pd.Series([5, 9, -3, np.nan, 6, -2, np.nan])

In [20]:
s

0    5.0
1    9.0
2   -3.0
3    NaN
4    6.0
5   -2.0
6    NaN
dtype: float64

In [21]:
s.isnull()

0    False
1    False
2    False
3     True
4    False
5    False
6     True
dtype: bool

In [22]:
s.notnull()

0     True
1     True
2     True
3    False
4     True
5     True
6    False
dtype: bool

In [23]:
s[s.notnull()]

0    5.0
1    9.0
2   -3.0
4    6.0
5   -2.0
dtype: float64

In [24]:
s[s.isnull()]

3   NaN
6   NaN
dtype: float64

In [25]:
s.value_counts()

-2.0    1
 6.0    1
-3.0    1
 9.0    1
 5.0    1
dtype: int64

In [26]:
s.isin([np.nan])

0    False
1    False
2    False
3     True
4    False
5    False
6     True
dtype: bool

### Series as Dictionaries

In [28]:
pd.Series({'a':2, 'b':5, 'c': 7})

a    2
b    5
c    7
dtype: int64

In [29]:
mydict = {'red': 2000, 'blue': 1000, 'yellow': 500, 
          'orange': 1000}
myseries = pd.Series(mydict)

In [30]:
myseries

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

In [31]:
colors = ['red','yellow','orange','blue','green']

In [32]:
myseries = pd.Series(mydict, index=colors)

In [33]:
myseries

red       2000.0
yellow     500.0
orange    1000.0
blue      1000.0
green        NaN
dtype: float64

### Operations between Series

In [35]:
mydict2 = {'red':400,'yellow':1000,'black':700}
myseries2 = pd.Series(mydict2)

In [36]:
myseries + myseries2

black        NaN
blue         NaN
green        NaN
orange       NaN
red       2400.0
yellow    1500.0
dtype: float64

In [38]:
ser = pd.Series(np.random.randint(100, 200, 10), index=np.arange(10))
ser2 = pd.Series(np.random.randint(100,200, 10), index=np.arange(1, 11))

In [41]:
ser2

1     117
2     143
3     173
4     159
5     177
6     167
7     143
8     163
9     139
10    194
dtype: int32

In [43]:
np.log(ser + ser2)

0          NaN
1     5.476464
2     5.525453
3     5.777652
4     5.564520
5     5.886104
6     5.849325
7     5.703782
8     5.602119
9     5.552960
10         NaN
dtype: float64

---

# Important Points

- Series contains values and index.
- We can give name to series object
- index and values work like dictionary
- Series creation by specifying values and index, or pass dictionary
- Series indexing or slicing works the same as numpy, or with loc or iloc
- Explicit index and implicit index
- object.value_counts: return number of unique elements
- object.isin: take parameter element than want to identify, return boolean Series
- object.isnull: check for null values, return boolean Series
- object.notnull: check for non-null values, return boolean Series
- object.unique: return the unique elements in an array
- operation preserves index, whatever operation with null, will return null