## Panadas is Used for most data analysis in Python 
Well suited to handle tabular data which is heterogenous in nature

In [31]:
#importing pandas library
import pandas as pd

Two key data structures used by python: <br>
    Series - used to represent one-dimensional data <br>
    Dataframe - used to represent multi-dimensional data <br>

#### Series

In [32]:
#Series have two arrays associated with one another - Index and the values
s = pd.Series([4,6,5,3])
s

0    4
1    6
2    5
3    3
dtype: int64

In [33]:
#you could redefine your index values as well
s = pd.Series([4,6,5,3],index=['a','b','c','d'])
s

a    4
b    6
c    5
d    3
dtype: int64

In [34]:
#view the indices
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [35]:
#view the values
s.values

array([4, 6, 5, 3])

In [36]:
s.values.dtype

dtype('int64')

In [37]:
#Slicing, selecting and assigning elements from the Series follow simple rules as Arrays
print(s[['a','b']])
print(s[2])
print(s[1:3])
s[2] = 5
s['b'] = 'apple'
print(s)
#notice how the dtype automatically converts to 'object' once heterogenity is involved

a    4
b    6
dtype: int64
5
b    6
c    5
dtype: int64
a        4
b    apple
c        5
d        3
dtype: object


  print(s[2])
  s[2] = 5
  s['b'] = 'apple'


In [38]:
print(type(s))
print(s.values.dtype)

<class 'pandas.core.series.Series'>
object


In [39]:
s.values

array([4, 'apple', 5, 3], dtype=object)

In [40]:
print(type(s.values))

<class 'numpy.ndarray'>


In [41]:
s['b'] = 3
print(type(s.values))

<class 'numpy.ndarray'>


In [42]:
print(s.values.dtype)
#Notice how the dtype changes above when assigning a value of a different data type

object


#### converting other data structures to Series and assignment of values

In [43]:
import numpy as np
n1 = np.array([5,6,7,8])
s1 = pd.Series(n1)
s1

0    5
1    6
2    7
3    8
dtype: int64

In [44]:
s1[2] = -4

In [45]:
n1

array([ 5,  6, -4,  8])

In [46]:
s1

0    5
1    6
2   -4
3    8
dtype: int64

In [47]:
#notice that both the Series and the numpy array are pointing to the same location.
#Therefore, a change in either the Series of the numpy array is also reflected in the other data structure.

In [48]:
#filtering values
s1[s1>3]

0    5
1    6
3    8
dtype: int64

In [49]:
#Mathematical Operations
s1/2

0    2.5
1    3.0
2   -2.0
3    4.0
dtype: float64

In [50]:
np.log(s1)

  result = getattr(ufunc, method)(*inputs, **kwargs)


0    1.609438
1    1.791759
2         NaN
3    2.079442
dtype: float64

In [51]:
s2 = pd.Series([3,4,4,6,3,5,1])
s2

0    3
1    4
2    4
3    6
4    3
5    5
6    1
dtype: int64

#### picking Unique elements

In [53]:
s2.unique()

array([3, 4, 6, 5, 1])

#### getting frequency counts

In [54]:
s2.value_counts()

3    2
4    2
6    1
5    1
1    1
Name: count, dtype: int64

#### check if certain values are present

In [55]:
s2.isin([3,4])

0     True
1     True
2     True
3    False
4     True
5    False
6    False
dtype: bool

In [56]:
s2[s2.isin([3,4])]

0    3
1    4
2    4
4    3
dtype: int64

#### NaN values
NaN is short for "Not a Number" <br>
These values are generated whenever there is missing data (or) there is an issue while reading from a data source (or) when exceptions occur during calculations

In [57]:
#you can define a NaN value in a Series. NaN can be retrieved from "numpy" library.
s3 = pd.Series([3,6,np.NaN,7])
s3

0    3.0
1    6.0
2    NaN
3    7.0
dtype: float64

In [58]:
#To check whether your Series has a NaN value, you can use isnull() (or) notnull()
s3.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [59]:
s3.notnull()

0     True
1     True
2    False
3     True
dtype: bool

In [60]:
#What would you do if you want to retrieve the values that are not null?
s3[s3.notnull()]

0    3.0
1    6.0
3    7.0
dtype: float64

#### Series as dictionaries

In [64]:
s4 = pd.Series({1:'apple',2:'banana',3:"lemon"})
s4

1     apple
2    banana
3     lemon
dtype: object

In [65]:
#you can also reassign the indices
l2 = [6,7,8]
s4.index = l2
s4

6     apple
7    banana
8     lemon
dtype: object

#### Series operations

In [66]:
s5 = pd.Series({7:"peach",8:"grape"})
s5

7    peach
8    grape
dtype: object

In [67]:
s4 + s5

6            NaN
7    bananapeach
8     lemongrape
dtype: object

In [None]:
#What will happen if the values in both the Series are integers?