# Series important methods
- astype
- between
- clip
- drop_duplicates
- isnull
- dropna
- fillna
- isin
- apply
- copy  

In [1]:
import numpy as np
import pandas as pd

In [2]:
vk = pd.read_csv('kohli_ipl.csv',index_col='match_no').squeeze()

In [3]:
subs = pd.read_csv('subs.csv').squeeze()

In [4]:
movies = pd.read_csv('bollywood.csv',index_col='movie').squeeze()

In [5]:
type(vk)

pandas.core.series.Series

In [6]:
import sys

In [7]:
sys.getsizeof(vk)

3472

In [8]:
sys.getsizeof(vk.astype('Int16'))

2397

In [9]:
# In how many matches vk scored 50<runs<100
vk.between(51,99)  # (both parameters are included) return a bollean series

match_no
1      False
2      False
3      False
4      False
5      False
       ...  
211    False
212    False
213     True
214    False
215    False
Name: runs, Length: 215, dtype: bool

In [10]:
vk[vk.between(51,99)].size

43

In [11]:
subs.clip(100,200) # clip within range: will make all values(smaller than 100) to 100 and values(greater than 200) to 200

0      100
1      100
2      100
3      100
4      100
      ... 
360    200
361    200
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [12]:
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [13]:
movies.drop_duplicates()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Sssshhh...                              Tanishaa Mukerji
Rules: Pyaar Ka Superhit Formula                  Tanuja
Right Here Right Now (film)                        Ankit
Talaash: The Hunt Begins...                Rakhee Gulzar
The Pink Mirror                          Edwin Fernandes
Name: lead, Length: 566, dtype: object

In [14]:
temp = pd.Series([1,1,2,3,3,4,4])
temp

0    1
1    1
2    2
3    3
4    3
5    4
6    4
dtype: int64

In [15]:
temp.drop_duplicates(keep = 'last') # by default it keeps first occurrence 

1    1
2    2
4    3
6    4
dtype: int64

In [16]:
temp.duplicated() # tell whether a value is duplicated or not

0    False
1     True
2    False
3    False
4     True
5    False
6     True
dtype: bool

In [17]:
temp.duplicated().sum()

3

In [18]:
temp = pd.Series([np.nan,1,2,3,np.nan,np.nan])
temp

0    NaN
1    1.0
2    2.0
3    3.0
4    NaN
5    NaN
dtype: float64

In [19]:
temp.size

6

In [20]:
temp.count() # count only non-missing values

3

In [21]:
temp.isnull()

0     True
1    False
2    False
3    False
4     True
5     True
dtype: bool

In [22]:
temp.isnull().sum() # 3 missing values

3

In [23]:
vk.isnull().sum()

0

In [24]:
temp.dropna()

1    1.0
2    2.0
3    3.0
dtype: float64

In [25]:
temp.fillna(77)

0    77.0
1     1.0
2     2.0
3     3.0
4    77.0
5    77.0
dtype: float64

In [26]:
temp.fillna(temp.mean())

0    2.0
1    1.0
2    2.0
3    3.0
4    2.0
5    2.0
dtype: float64

In [27]:
vk[(vk == 55) | (vk == 99)]

match_no
82     99
137    55
Name: runs, dtype: int64

In [28]:
vk[vk.isin([55,99,0])]

match_no
8       0
82     99
87      0
91      0
93      0
130     0
135     0
137    55
206     0
207     0
211     0
Name: runs, dtype: int64

In [29]:
# apply helps to apply the custom logic on your data
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [30]:
movies.apply(lambda actor:actor.split()[0])

movie
Uri: The Surgical Strike                  Vicky
Battalion 609                             Vicky
The Accidental Prime Minister (film)     Anupam
Why Cheat India                          Emraan
Evening Shadows                            Mona
                                         ...   
Hum Tumhare Hain Sanam                     Shah
Aankhen (2002 film)                     Amitabh
Saathiya (film)                           Vivek
Company (film)                             Ajay
Awara Paagal Deewana                     Akshay
Name: lead, Length: 1500, dtype: object

In [31]:
movies.apply(lambda actor:actor.split()[0].upper())

movie
Uri: The Surgical Strike                  VICKY
Battalion 609                             VICKY
The Accidental Prime Minister (film)     ANUPAM
Why Cheat India                          EMRAAN
Evening Shadows                            MONA
                                         ...   
Hum Tumhare Hain Sanam                     SHAH
Aankhen (2002 film)                     AMITABH
Saathiya (film)                           VIVEK
Company (film)                             AJAY
Awara Paagal Deewana                     AKSHAY
Name: lead, Length: 1500, dtype: object

In [32]:
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [33]:
vk.apply(lambda runs:'OUT' if runs==0 else runs)

match_no
1        1
2       23
3       13
4       12
5        1
      ... 
211    OUT
212     20
213     73
214     25
215      7
Name: runs, Length: 215, dtype: object

In [34]:
subs.apply(lambda x:'Good' if x > subs.mean() else 'Bad')

0       Bad
1       Bad
2       Bad
3       Bad
4       Bad
       ... 
360    Good
361    Good
362    Good
363    Good
364    Good
Name: Subscribers gained, Length: 365, dtype: object

In [35]:
# head/tail functions are basically preview of data, so any change you made will reflect on your original data.

In [36]:
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [37]:
n = vk.head()
n

match_no
1     1
2    23
3    13
4    12
5     1
Name: runs, dtype: int64

In [38]:
n[1] = 1

In [39]:
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [40]:
# To avoid this we can make a copy

In [41]:
n = vk.head().copy()

In [42]:
n[1] = 2000

In [43]:
n

match_no
1    2000
2      23
3      13
4      12
5       1
Name: runs, dtype: int64

In [44]:
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64