## Some Important Series Methods

In [1]:
# astype
# between
# clip
# drop_duplicates
# isnull
# dropna
# fillna
# isin
# apply
# copy

In [2]:
import numpy as np
import pandas as pd

In [3]:
subs_series = pd.read_csv('Datasets\datasets-session-16\subs.csv') # this gives dataframe, not series
subs = subs_series['Subscribers gained']
type(subs)
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [4]:
vk_series = pd.read_csv('Datasets/datasets-session-16/kohli_ipl.csv')
vk = pd.Series(vk_series['runs'].values, index=vk_series['match_no'])
type(vk)
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Length: 215, dtype: int64

In [5]:
movie_series = pd.read_csv('Datasets/datasets-session-16/bollywood.csv')
bw = pd.Series(movie_series['lead'].values, index=movie_series['movie'])
type(bw)
bw

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Length: 1500, dtype: object

In [8]:
# astype
import sys
print(sys.getsizeof(vk))

sys.getsizeof(vk.astype('int16'))

3472


2182

In [9]:
# between
vk.between(51,99)
vk[vk.between(51,99)]
vk[vk.between(51,99)].size

43

In [10]:
# clip
subs
subs.clip(100,200) # values less than 100 become 100 and more than 200 become 200
# values between 100 and 200 remain same

0      100
1      100
2      100
3      100
4      100
      ... 
360    200
361    200
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [21]:
# drop_duplicates - removes duplicates
temp = pd.Series([1,1,2,2,3,3,4,4])
print(temp)
print()

print(temp.drop_duplicates(keep='last')) # drops previous occurrence and keeps last occurrence
print()

print(temp.duplicated()) # checks if duplicate present
print()

print(temp.duplicated().sum())

0    1
1    1
2    2
3    2
4    3
5    3
6    4
7    4
dtype: int64

1    1
3    2
5    3
7    4
dtype: int64

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
dtype: bool

4


In [20]:
bw.drop_duplicates()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Sssshhh...                              Tanishaa Mukerji
Rules: Pyaar Ka Superhit Formula                  Tanuja
Right Here Right Now (film)                        Ankit
Talaash: The Hunt Begins...                Rakhee Gulzar
The Pink Mirror                          Edwin Fernandes
Length: 566, dtype: object

In [22]:
# Dealing with missing values
temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
temp

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

In [24]:
print(temp.size)    # counts missing values
print(temp.count()) # does not counts missing values

10
7


In [28]:
# isnull
print(temp.isnull())
print(temp.isnull().sum())

0    False
1    False
2    False
3     True
4    False
5    False
6     True
7    False
8     True
9    False
dtype: bool
3


In [32]:
# TO handle missing values

# 1. remove missing values
# dropna
print(temp.dropna())

# 2. fill missing values
# fillna
print(temp.fillna(0)) # filled missing values with 0
print(temp.fillna(temp.mean()))

0     1.0
1     2.0
2     3.0
4     5.0
5     6.0
7     8.0
9    10.0
dtype: float64
0     1.0
1     2.0
2     3.0
3     0.0
4     5.0
5     6.0
6     0.0
7     8.0
8     0.0
9    10.0
dtype: float64
0     1.0
1     2.0
2     3.0
3     5.0
4     5.0
5     6.0
6     5.0
7     8.0
8     5.0
9    10.0
dtype: float64


In [34]:
# isin
vk[(vk == 49) | (vk == 99)]

# shortcut
vk[vk.isin([49,99])]

match_no
82    99
86    49
dtype: int64

In [35]:
# apply
bw.apply(lambda x:x.split()[0].upper()) # splitting the names, getting first name [0], making it capital

movie
Uri: The Surgical Strike                  VICKY
Battalion 609                             VICKY
The Accidental Prime Minister (film)     ANUPAM
Why Cheat India                          EMRAAN
Evening Shadows                            MONA
                                         ...   
Hum Tumhare Hain Sanam                     SHAH
Aankhen (2002 film)                     AMITABH
Saathiya (film)                           VIVEK
Company (film)                             AJAY
Awara Paagal Deewana                     AKSHAY
Length: 1500, dtype: object

In [36]:
subs
subs.apply(lambda x:'good day' if x > subs.mean() else 'bad day') # more than average prints good day, else bad day

0       bad day
1       bad day
2       bad day
3       bad day
4       bad day
         ...   
360    good day
361    good day
362    good day
363    good day
364    good day
Name: Subscribers gained, Length: 365, dtype: object

In [None]:
'''
when chagnes are made in data, its a 'view', this changes original values on that page.
so, here if 'new' is executed, we get data. If changes are made without using copy(), it will change the original values
so, again, if previous 'new' is executed changes are seen

but using copy, executing previous 'cnew' does reflect changes
'''

In [37]:
# copy
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Length: 215, dtype: int64

In [39]:
new = vk.head(5)
new

match_no
1     1
2    23
3    13
4    12
5     1
dtype: int64

In [41]:
new[1] = 20
new

match_no
1    20
2    23
3    13
4    12
5     1
dtype: int64

In [46]:
cnew = vk.head(5).copy()
cnew

match_no
1    20
2    23
3    13
4    12
5     1
dtype: int64

In [44]:
cnew[1] = 200
cnew

match_no
1    200
2     23
3     13
4     12
5      1
dtype: int64

In [45]:
cnew

match_no
1    200
2     23
3     13
4     12
5      1
dtype: int64