### What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

https://pandas.pydata.org/about/index.html


### Pandas Series

A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

## Import Pandas

In [1]:
import pandas as pd
import numpy as np


Series from list

In [2]:
country =['India', 'Pakistan', 'England', 'Nepal', 'Srilanka']
pd.Series(country)

0       India
1    Pakistan
2     England
3       Nepal
4    Srilanka
dtype: object

In [3]:
runs =[30, 70, 40, 90]
pd.Series(runs)

0    30
1    70
2    40
3    90
dtype: int64

In [4]:
# custom index
marks =[67, 89,100,76]
subject=['math', 'hindi' ,'english', 'physics']
pd.Series(marks, index=subject)


math        67
hindi       89
english    100
physics     76
dtype: int64

In [5]:
marks =pd.Series(marks, index=subject, name='Mishra Ke Marks ')

In [6]:
marks

math        67
hindi       89
english    100
physics     76
Name: Mishra Ke Marks , dtype: int64

Series from dict

In [7]:
marks ={
    'math':80,
    'english':60,
    'hindi':90,
    'physics':70,
}
marks_series=pd.Series(marks, name='Mishra Ke Marks ')
marks_series

math       80
english    60
hindi      90
physics    70
Name: Mishra Ke Marks , dtype: int64

Attributes of series

In [8]:
marks_series.size

4

In [9]:
marks_series.is_unique

True

In [10]:
marks_series.name

'Mishra Ke Marks '

In [11]:
marks_series.index

Index(['math', 'english', 'hindi', 'physics'], dtype='object')

series usnig read csv

In [12]:
subs = pd.read_csv('subs.csv')  # Read the CSV file into a DataFrame
subs = subs.squeeze()  # Convert the DataFrame to a Series if it has only one column


In [13]:
subs

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [14]:
# with 2 cols
vk =pd.read_csv('kohli_ipl.csv', index_col='match_no')
vk=vk.squeeze()
vk.head()

match_no
1     1
2    23
3    13
4    12
5     1
Name: runs, dtype: int64

In [15]:
movies =pd.read_csv('bollywood.csv', index_col='movie')
movies=movies.squeeze()
movies.head()

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
Name: lead, dtype: object

Series methods

In [16]:
# head or tail
subs.tail()

360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, dtype: int64

In [17]:
subs.head()

0    48
1    57
2    40
3    43
4    44
Name: Subscribers gained, dtype: int64

In [18]:
vk.head()

match_no
1     1
2    23
3    13
4    12
5     1
Name: runs, dtype: int64

In [19]:
vk.tail(10)

match_no
206     0
207     0
208     9
209    58
210    30
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [20]:
movies.sample(10)

movie
P Se Pyaar F Se Faraar       Bhavesh Kumar
Paathshaala                   Nana Patekar
Bang Bang!                  Hrithik Roshan
Aashayein                     John Abraham
Pink (2016 film)             Taapsee Pannu
Pal Pal Dil Ke Paas             Karan Deol
Aurangzeb (film)              Arjun Kapoor
Aankhen (2002 film)       Amitabh Bachchan
John Day (film)           Naseeruddin Shah
Tahaan                     Victor Banerjee
Name: lead, dtype: object

In [21]:
# values cou
movies.value_counts()

lead
Akshay Kumar            48
Amitabh Bachchan        45
Ajay Devgn              38
Salman Khan             31
Sanjay Dutt             26
                        ..
Tanishaa Mukerji         1
Tanuja                   1
Ankit                    1
Rakhee Gulzar            1
Geetika Vidya Ohlyan     1
Name: count, Length: 566, dtype: int64

In [22]:
# sort values
vk.sort_values(ascending=False).head()


match_no
128    113
126    109
123    108
120    100
164    100
Name: runs, dtype: int64

In [23]:
vk.sort_values(ascending=False).head(1).values[0]

np.int64(113)

In [24]:
vk = vk.copy()  # Create a copy of the Series
vk.sort_values(inplace=True)  # Sort the Series in-place


In [25]:
vk

match_no
8        0
87       0
93       0
91       0
206      0
      ... 
164    100
120    100
123    108
126    109
128    113
Name: runs, Length: 215, dtype: int64

In [26]:
movies.sort_index()

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

In [27]:
vk.sort_values(inplace=True)

In [28]:
vk

match_no
8        0
87       0
93       0
91       0
206      0
      ... 
164    100
120    100
123    108
126    109
128    113
Name: runs, Length: 215, dtype: int64

In [29]:
vk.count()

np.int64(215)

In [30]:
subs.sum()

np.int64(49510)

In [31]:
subs.product()

np.int64(0)

In [32]:
# mean -> median -> mode -> std -> var 
subs.mean()
print(vk.median())
print(movies.mode())
print(subs.std())

24.0
0    Akshay Kumar
Name: lead, dtype: object
62.6750230372527


In [33]:
subs.min()

np.int64(33)

In [34]:
subs.max()

np.int64(396)

In [35]:
vk.describe()

count    215.000000
mean      30.855814
std       26.229801
min        0.000000
25%        9.000000
50%       24.000000
75%       48.000000
max      113.000000
Name: runs, dtype: float64

series Indexing

In [36]:
x =pd.Series([12,13,14, 70, 84, 7])
x

0    12
1    13
2    14
3    70
4    84
5     7
dtype: int64

In [37]:
x[1]

np.int64(13)

In [38]:
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [39]:
movies[0]

  movies[0]


'Vicky Kaushal'

In [40]:
vk[5:16]

match_no
207    0
135    0
130    0
211    0
106    1
204    1
113    1
77     1
1      1
5      1
75     1
Name: runs, dtype: int64

In [41]:
vk[-5::]

match_no
164    100
120    100
123    108
126    109
128    113
Name: runs, dtype: int64

Editing Series

In [44]:
marks_series[1] =100
marks_series

  marks_series[1] =100


math        80
english    100
hindi       90
physics     70
Name: Mishra Ke Marks , dtype: int64

In [45]:
marks_series['sst'] =90

In [46]:
marks_series

math        80
english    100
hindi       90
physics     70
sst         90
Name: Mishra Ke Marks , dtype: int64

In [49]:
movies['2 states (2014 film)'] ='Alia Bhatt'
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
2 states (2014 film)                          Alia Bhatt
Name: lead, Length: 1501, dtype: object

series  with python

In [54]:
# len/type/dir/sorted/max/min
print(len(subs))
print(type(subs))
print(dir(subs))
print(sorted(subs))
print(min(subs))
print(max(subs))

365
<class 'pandas.core.series.Series'>
['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__bool__', '__class__', '__column_consortium_standard__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pandas_priority__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce

In [55]:
 # types conversion 
list(marks_series)

[80, 100, 90, 70, 90]

In [None]:
# member ship opreater



In [56]:
for i in movies:
    print(i)

Vicky Kaushal
Vicky Ahuja
Anupam Kher
Emraan Hashmi
Mona Ambegaonkar
Geetika Vidya Ohlyan
Arshad Warsi
Radhika Apte
Kangana Ranaut
Nawazuddin Siddiqui
Ali Asgar
Ranveer Singh
Prit Kamani
Ajay Devgn
Sushant Singh Rajput
Amitabh Bachchan
Abhimanyu Dasani
Talha Arshad Reshi
Nawazuddin Siddiqui
Garima Agarwal
Rasika Agashe
Barun Sobti
Akshay Kumar
Zaheer Iqbal
Vidyut Jammwal
Deepika Amin
Manav Kaul
Naseeruddin Shah
Varun Dhawan
Shreyas Talpade
Tiger Shroff
Boman Irani
Ajay Devgn
Arjun Kapoor
Gavie Chahal
Prabhu Deva
Shahid Kapoor
Ayushmann Khurrana
Anupam Kher
Karanvir Bohra
Hrithik Roshan
Jimmy Sheirgill
John Abraham
Rishi Kapoor
Kangana Ranaut
Natalia Janoszek
Diljit Dosanjh
Sidharth Malhotra
Rajeev Khandelwal
Zaira Wasim
Akshay Kumar
Jacqueline Fernandez
Ayushmann Khurrana
Akshaye Khanna
Sonam Kapoor
Karan Deol
Sanjay Dutt
Bhavesh Kumar
Sanaya Irani
Ayushmann Khurrana
Siddhanth Kapoor
Akshay Kumar
Taapsee Pannu
Rajkummar Rao
Sunny Singh Nijjar
Neil Nitin Mukesh
Suraj Pancholi
Boman Iran

In [57]:
# Arithmetic Opreaters 
100 - marks_series

math       20
english     0
hindi      10
physics    30
sst        10
Name: Mishra Ke Marks , dtype: int64

In [None]:
# Relational opreaters 