# Introduction to Series in Pandas

In [3]:
import pandas as pd   # importing pandas library

# Creating a Series

In [18]:
# Using list
d1=[1,2,3,4,5,6,7]
df=pd.Series(d1)
print(df)  # access series

0    1
1    2
2    3
3    4
4    5
5    6
6    7
dtype: int64


In [20]:
# Using Dictionary
d2={"a":11,"b":22,"c":33,"d":55}
df1=pd.Series(d2)
print(df1) # access series

a    11
b    22
c    33
d    55
dtype: int64


In [152]:
marks=[66,55,88,45,76,98,67,87,51,89,56,86,43,65,44,None,88,None,None,None,35,69,None]  
name=["rahul","riya","sham","kunal","ram",
      "arjun","datta","batti","mira","nanda","switi","manohar","atul","ashwin","balaji","kavita","balaji","kumar",
      "nayan","praghya","sanyukt","ramkishan","krishna"]
df=pd.Series(marks,name)
df.name="Student exam detail Mark and Name"
print(df)
print(df.name) # name function is used to label the info about data
# Note : "None" means null value or absent values

rahul        66.0
riya         55.0
sham         88.0
kunal        45.0
ram          76.0
arjun        98.0
datta        67.0
batti        87.0
mira         51.0
nanda        89.0
switi        56.0
manohar      86.0
atul         43.0
ashwin       65.0
balaji       44.0
kavita        NaN
balaji       88.0
kumar         NaN
nayan         NaN
praghya       NaN
sanyukt      35.0
ramkishan    69.0
krishna       NaN
Name: Student exam detail Mark and Name, dtype: float64
Student exam detail Mark and Name


In [173]:
""" df.head(n): Returns the first n elements.
df.tail(n): Returns the last n elements.
df.shape: Returns the shape of the Series.
df.size: Returns the number of elements in the Series.
df.empty: Checks if the Series is empty.
df.ndim: Returns the number of dimensions (always 1 for Series). """
print(df.head())
print()
print(df.tail())
print()
print(df.shape)
print()
print(df.size)
print()
print(df.empty)
print()
print(df.ndim)



rahul    66.0
riya     55.0
sham     88.0
kunal    45.0
ram      76.0
dtype: float64

ashwin       65.000000
balaji       44.000000
kavita       67.111111
sanyukt      35.000000
ramkishan    69.000000
dtype: float64

(18,)

18

False

1


# Access the Series element using "loc" and "iloc" function

In [153]:
# 1. loc access the element label  wise
marks=[66,55,88,45,76,98,67,87,51,89,56,86,43,65,44,None,88,None,None,None,35,69,None]  
name=["rahul","riya","sham","kunal","ram",
      "arjun","datta","batti","mira","nanda","switi","manohar","atul","ashwin","balaji","kavita","balaji","kumar",
      "nayan","praghya","sanyukt","ramkishan","krishna"]
df=pd.Series(marks,name)
print(df.loc["ram"]) # Access single element
print()
print(df.loc["ram":"nanda"]) # Multiple element access Slicing



76.0

ram      76.0
arjun    98.0
datta    67.0
batti    87.0
mira     51.0
nanda    89.0
dtype: float64


In [154]:
# 2. iloc acces the element position wise
marks=[66,55,88,45,76,98,67,87,51,89,56,86,43,65,44,None,88,None,None,None,35,69,None]  
name=["rahul","riya","sham","kunal","ram",
      "arjun","datta","batti","mira","nanda","switi","manohar","atul","ashwin","balaji","kavita","balaji","kumar",
      "nayan","praghya","sanyukt","ramkishan","krishna"]
df=pd.Series(marks,name)
print(df.iloc[8]) # Access single element
print()
print(df.iloc[6:9]) # Multiple element access Slicing



51.0

datta    67.0
batti    87.0
mira     51.0
dtype: float64


# Getting the Information about Series Using "info()" function

In [155]:
df.info()

<class 'pandas.core.series.Series'>
Index: 23 entries, rahul to krishna
Series name: None
Non-Null Count  Dtype  
--------------  -----  
18 non-null     float64
dtypes: float64(1)
memory usage: 368.0+ bytes


# Getting the Statical info about Series Using "describe()" function

In [156]:
df.describe()

count    18.000000
mean     67.111111
std      19.268654
min      35.000000
25%      52.000000
50%      66.500000
75%      86.750000
max      98.000000
dtype: float64

In [157]:
print(df.dtype)  # The index (labels) of the Series.
print()
print(df.values)  # The data (values) of the Series.
print()
print(df.index)  # The data type of the Series.

float64

[66. 55. 88. 45. 76. 98. 67. 87. 51. 89. 56. 86. 43. 65. 44. nan 88. nan
 nan nan 35. 69. nan]

Index(['rahul', 'riya', 'sham', 'kunal', 'ram', 'arjun', 'datta', 'batti',
       'mira', 'nanda', 'switi', 'manohar', 'atul', 'ashwin', 'balaji',
       'kavita', 'balaji', 'kumar', 'nayan', 'praghya', 'sanyukt', 'ramkishan',
       'krishna'],
      dtype='object')


# To check null or Nan value present in the Series Using "isnull()" function

In [158]:
df.isnull().sum() # This is return total sum of null values

5

In [159]:
df.isnull() # This is return true or false "True" means value is null or NaN And "False" means value is not null

rahul        False
riya         False
sham         False
kunal        False
ram          False
arjun        False
datta        False
batti        False
mira         False
nanda        False
switi        False
manohar      False
atul         False
ashwin       False
balaji       False
kavita        True
balaji       False
kumar         True
nayan         True
praghya       True
sanyukt      False
ramkishan    False
krishna       True
dtype: bool

In [160]:
# Now just print null or Nan rows the below is Nan rows
df[df.isnull()]

kavita    NaN
kumar     NaN
nayan     NaN
praghya   NaN
krishna   NaN
dtype: float64

# Fill NaN or null Values

In [161]:
""" 1. Fill with a Constant Value - df.fillna(0)
    2. Forward Fill (method='ffill')
    3. Backward Fill (method='bfill')
    4. Fill Using Interpolation (method='interpolate')
    5. Fill with a Value Dict or Series
    6. Fill with Mean, Median, or Mode - df.fillna(df.mean())
    7. Limiting the Number of Fills - filled = s.fillna(method='ffill', limit=1)
    8. Fill in Place - df.fillna(0, inplace=True) """
df.fillna(df.mean(),inplace=True)

In [162]:
# After filling now check the NaN value show O Zero
df.isnull().sum()

0

# To check duplicate rows  present in the Series Using "duplicated()" function

In [163]:
df.duplicated().sum() # This is return total sum of duplicated rows

5

In [164]:
df.duplicated() # This is return true or false "True" means row is duplicated And "False" means row is not duplicated 

rahul        False
riya         False
sham         False
kunal        False
ram          False
arjun        False
datta        False
batti        False
mira         False
nanda        False
switi        False
manohar      False
atul         False
ashwin       False
balaji       False
kavita       False
balaji        True
kumar         True
nayan         True
praghya       True
sanyukt      False
ramkishan    False
krishna       True
dtype: bool

In [165]:
# Now just print duplicated rows the below row is duplicated rows
print(df[df.duplicated()])

balaji     88.000000
kumar      67.111111
nayan      67.111111
praghya    67.111111
krishna    67.111111
dtype: float64


# Drop Duplicated rows using "drop_duplicates()"

In [166]:
df.drop_duplicates(inplace=True)

In [167]:
# now check after cdropping duplicates now aslo show Zero
df.duplicated().sum()

0

# Arange mark in acending order using "sort_values()" function

In [168]:
print(df.sort_values())

sanyukt      35.000000
atul         43.000000
balaji       44.000000
kunal        45.000000
mira         51.000000
riya         55.000000
switi        56.000000
ashwin       65.000000
rahul        66.000000
datta        67.000000
kavita       67.111111
ramkishan    69.000000
ram          76.000000
manohar      86.000000
batti        87.000000
sham         88.000000
nanda        89.000000
arjun        98.000000
dtype: float64


# Aggeregate Function

In [169]:
print(len(df))
print(sum(df))
print(min(df))
print(max(df))
print(sorted(df))

18
1187.111111111111
35.0
98.0
[35.0, 43.0, 44.0, 45.0, 51.0, 55.0, 56.0, 65.0, 66.0, 67.0, 67.11111111111111, 69.0, 76.0, 86.0, 87.0, 88.0, 89.0, 98.0]


In [170]:
df.unique()

array([66.        , 55.        , 88.        , 45.        , 76.        ,
       98.        , 67.        , 87.        , 51.        , 89.        ,
       56.        , 86.        , 43.        , 65.        , 44.        ,
       67.11111111, 35.        , 69.        ])

In [171]:
df.value_counts()

66.000000    1
55.000000    1
35.000000    1
67.111111    1
44.000000    1
65.000000    1
43.000000    1
86.000000    1
56.000000    1
89.000000    1
51.000000    1
87.000000    1
67.000000    1
98.000000    1
76.000000    1
45.000000    1
88.000000    1
69.000000    1
Name: count, dtype: int64