# **Important Methods for Pandas Series**

In [6]:
import sys
import numpy as np
import pandas as pd
movies = pd.read_csv('/content/bollywood.csv')
vk = pd.read_csv('/content/kohli_ipl.csv')
subs = pd.read_csv('/content/subs.csv')

In [3]:
movies.head(5)

Unnamed: 0,movie,lead
0,Uri: The Surgical Strike,Vicky Kaushal
1,Battalion 609,Vicky Ahuja
2,The Accidental Prime Minister (film),Anupam Kher
3,Why Cheat India,Emraan Hashmi
4,Evening Shadows,Mona Ambegaonkar


In [4]:
vk.head(5)

Unnamed: 0,match_no,runs
0,1,1
1,2,23
2,3,13
3,4,12
4,5,1


In [5]:
subs.head()

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44


## **1. `astype`**

In [7]:
# Change the datatype
sys.getsizeof(vk)

3604

In [8]:
sys.getsizeof(vk.astype('int16'))

1024

## **2. `between`**

In [14]:
print("Virat Kohli runs in between: 51 to 99")
print(vk['runs'].between(51,99))
print()
print("Total Scores: ", vk[vk['runs'].between(51,99)].size)

Virat Kohli runs in between: 51 to 99
0      False
1      False
2      False
3      False
4      False
       ...  
210    False
211    False
212     True
213    False
214    False
Name: runs, Length: 215, dtype: bool

Total Scores:  86


## **3. `clip`**

In [16]:
print(subs)

     Subscribers gained
0                    48
1                    57
2                    40
3                    43
4                    44
..                  ...
360                 231
361                 226
362                 155
363                 144
364                 172

[365 rows x 1 columns]


In [17]:
print("Clipping my subs dataset: ")
subs.clip(100,200)
# The values lower than 100, will be changed to 100
# The values higher than 200, will be changed to 200
# The values that are in between 100 and 200, will be remain as it is

Clipping my subs dataset: 


Unnamed: 0,Subscribers gained
0,100
1,100
2,100
3,100
4,100
...,...
360,200
361,200
362,155
363,144


## **4. `drop_duplicates`**

In [23]:
temp = pd.Series([1,1,2,2,3,3,4,4])
print("Original Series: ")
print(temp)
print()
print("After Dropping: ")
print(temp.drop_duplicates())
print()
print("Dropping the duplicates (keeping the last one): ")
print(temp.drop_duplicates(keep='last'))

Original Series: 
0    1
1    1
2    2
3    2
4    3
5    3
6    4
7    4
dtype: int64

After Dropping: 
0    1
2    2
4    3
6    4
dtype: int64

Dropping the duplicates (keeping the last one): 
1    1
3    2
5    3
7    4
dtype: int64


In [26]:
print("Total number of duplicates: ", temp.duplicated().sum())
print("Total number of duplicates (in vk dataset): ", vk.duplicated().sum())

Total number of duplicates:  4
Total number of duplicates (in vk dataset):  0


In [29]:
temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
print("Original Dataset: ")
print(temp)
print()
print("Size of the temp dataset: ", temp.size)
print()
print("Count of temp dataset: ", temp.count())

Original Dataset: 
0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

Size of the temp dataset:  10

Count of temp dataset:  7


## **5. `isnull`**

In [35]:
print("Total number of Null Values (temp dataset): ", temp.isnull().sum())
print()
print("Total number of Null Values (vk dataset): ", vk.isnull().sum())
print()
print("Total number of Null Values (subs dataset): ", subs.isnull().sum())

Total number of Null Values (temp dataset):  3

Total number of Null Values (vk dataset):  match_no    0
runs        0
dtype: int64

Total number of Null Values (subs dataset):  Subscribers gained    0
dtype: int64


## **6. `dropna`**

In [37]:
print("Original Dataset: ")
print(temp)
print()
print("After Dropping Null Values: ")
print(temp.dropna())

Original Dataset: 
0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

After Dropping Null Values: 
0     1.0
1     2.0
2     3.0
4     5.0
5     6.0
7     8.0
9    10.0
dtype: float64


## **7. `fillna`**

In [39]:
print("Original Dataset: ")
print(temp)
print()
print("Replacing the NaN values using 0: ")
print(temp.fillna(0))

Original Dataset: 
0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

Replacing the NaN values using 0: 
0     1.0
1     2.0
2     3.0
3     0.0
4     5.0
5     6.0
6     0.0
7     8.0
8     0.0
9    10.0
dtype: float64


In [42]:
print("Original Dataset: ")
print(temp)
print()
print("Replcaing the NaN values using mean: ")
print(temp.fillna(temp.mean()))

Original Dataset: 
0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     NaN
7     8.0
8     NaN
9    10.0
dtype: float64

Replcaing the NaN values using mean: 
0     1.0
1     2.0
2     3.0
3     5.0
4     5.0
5     6.0
6     5.0
7     8.0
8     5.0
9    10.0
dtype: float64


## **8. `isin`**

In [52]:
print("Original Dataset: ")
print(vk)
print()
print("Has Virat Kohli been get dismissed at 49 or 99 runs: ")
print(vk[(vk['runs'] == 49) | (vk['runs'] == 99)])

Original Dataset: 
     match_no  runs
0           1     1
1           2    23
2           3    13
3           4    12
4           5     1
..        ...   ...
210       211     0
211       212    20
212       213    73
213       214    25
214       215     7

[215 rows x 2 columns]

Has Virat Kohli been get dismissed at 49 or 99 runs: 
    match_no  runs
81        82    99
85        86    49


In [57]:
print(vk[vk['runs'].isin([49, 99])])

    match_no  runs
81        82    99
85        86    49


## **9. `apply`**

In [59]:
print("Original Dataset: ")
print(movies)

Original Dataset: 
                                     movie              lead
0                 Uri: The Surgical Strike     Vicky Kaushal
1                            Battalion 609       Vicky Ahuja
2     The Accidental Prime Minister (film)       Anupam Kher
3                          Why Cheat India     Emraan Hashmi
4                          Evening Shadows  Mona Ambegaonkar
...                                    ...               ...
1495                Hum Tumhare Hain Sanam    Shah Rukh Khan
1496                   Aankhen (2002 film)  Amitabh Bachchan
1497                       Saathiya (film)      Vivek Oberoi
1498                        Company (film)        Ajay Devgn
1499                  Awara Paagal Deewana      Akshay Kumar

[1500 rows x 2 columns]


In [61]:
print(movies['movie'].str.split().str[0].str.upper())

0            URI:
1       BATTALION
2             THE
3             WHY
4         EVENING
          ...    
1495          HUM
1496      AANKHEN
1497     SAATHIYA
1498      COMPANY
1499        AWARA
Name: movie, Length: 1500, dtype: object


In [62]:
print("Original subs dataset: ")
print(subs)

Original subs dataset: 
     Subscribers gained
0                    48
1                    57
2                    40
3                    43
4                    44
..                  ...
360                 231
361                 226
362                 155
363                 144
364                 172

[365 rows x 1 columns]


In [65]:
print(subs['Subscribers gained'].apply(lambda x: 'Good Day' if x > subs['Subscribers gained'].mean() else 'Bad Day'))

0       Bad Day
1       Bad Day
2       Bad Day
3       Bad Day
4       Bad Day
         ...   
360    Good Day
361    Good Day
362    Good Day
363    Good Day
364    Good Day
Name: Subscribers gained, Length: 365, dtype: object


## **10. `copy`**

In [67]:
print("Original Dataset: ")
print(vk)
print()
new = vk.head() # view
new

Original Dataset: 
     match_no  runs
0           1     1
1           2    23
2           3    13
3           4    12
4           5     1
..        ...   ...
210       211     0
211       212    20
212       213    73
213       214    25
214       215     7

[215 rows x 2 columns]



Unnamed: 0,match_no,runs
0,1,1
1,2,23
2,3,13
3,4,12
4,5,1


In [70]:
new[1] = 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new[1] = 100


In [71]:
new

Unnamed: 0,match_no,runs,1
0,1,1,100
1,2,23,100
2,3,13,100
3,4,12,100
4,5,1,100


In [72]:
new = vk.head().copy() #copy
new[1] = 264
new

Unnamed: 0,match_no,runs,1
0,1,1,264
1,2,23,264
2,3,13,264
3,4,12,264
4,5,1,264
