In [7]:
import pandas as pd
import numpy as np

# Creating series

In [70]:
# series : 1D 
e=pd.Series([10,40,80,60,20,30]) # we want to keep capiatal S
e

0    10
1    40
2    80
3    60
4    20
5    30
dtype: int64

In [17]:
e=pd.Series([1,2,3,4,5,6,'apple'])
e

0        1
1        2
2        3
3        4
4        5
5        6
6    apple
dtype: object

In [23]:
e.describe()

count    6.000000
mean     3.500000
std      1.870829
min      1.000000
25%      2.250000
50%      3.500000
75%      4.750000
max      6.000000
dtype: float64

# Data Manipulation

In [31]:
'''  map(func): Applies a function to each element in the Series.   '''

s=e.map(lambda x: x*2)

In [33]:
s

0     20
1     40
2     80
3    120
dtype: int64

In [35]:
''' .apply(func): Similar to map, but more flexible. (Can be used Data Frames as well,
where as map is only for Series)   '''

s=e.apply(lambda x:x*2)
s

0     20
1     40
2     80
3    120
dtype: int64

In [41]:
''' sorting the series'''
s=e.sort_values()    # change the incdicing alues
s

0    10
4    20
5    30
1    40
3    60
2    80
dtype: int64

In [66]:
''' drop the values'''
s=e.drop(1, inplace=True)         # inplace will  replace the series to e
s

In [72]:
e

0    10
1    40
2    80
3    60
4    20
5    30
dtype: int64

# Handling Missing Data

In [78]:
# Create a Series with missing values
d = pd.Series([1, 2, np.nan, 4, np.nan])

# Print the Series
print(d)

0    1.0
1    2.0
2    NaN
3    4.0
4    NaN
dtype: float64


In [80]:
d.isnull()

0    False
1    False
2     True
3    False
4     True
dtype: bool

In [82]:
d.notnull()

0     True
1     True
2    False
3     True
4    False
dtype: bool

In [88]:

''' filling the values where there is no values insted of np.nan'''
f=d.fillna(5)
f

0    1.0
1    2.0
2    5.0
3    4.0
4    5.0
dtype: float64

In [92]:
'''dropna removing the empty values of np.nan'''
g=d.dropna()
g

0    1.0
1    2.0
3    4.0
dtype: float64

# Indexing, Slicing, and Filtering

In [94]:
'''s.iloc[ ]:** Purely integer-location based indexing'''
# Indexing by position
s = pd.Series([1, 3, 5, 7, 9])
print(s.iloc[0])  # First element
print(s.iloc[-1])  # Last element

1
9


In [98]:
#**s.loc[ ]:** Label-based indexing.
print(s.loc[0])
print(s.loc[1])

1
3


In [104]:
#difference between iloc and loc
n=pd.Series([1,5,8,10,20],index=['A','B','C','D','E'])
n

A     1
B     5
C     8
D    10
E    20
dtype: int64

In [108]:
print(n.iloc[2],'by using iloc')
print(n.iloc[0:4],'by using iloc integer') #here its doesnt print  4 elemnet:20

8 by using iloc
A     1
B     5
C     8
D    10
dtype: int64 by using iloc integer


In [116]:
print(n.loc['B'])   #its is based on index
print(n.loc['A':'E'])  # it will print  E index also :20

5
A     1
B     5
C     8
D    10
E    20
dtype: int64


In [118]:
# Filtering elements greater than 5
filtered = n[n > 8]
print(filtered)

D    10
E    20
dtype: int64


# Aggregation

In [138]:
a=pd.Series([10,20,30,40,50,50])
a

0    10
1    20
2    30
3    40
4    50
5    50
dtype: int64

In [140]:
a.sum()

200

In [142]:
a.cumsum()

0     10
1     30
2     60
3    100
4    150
5    200
dtype: int64

In [144]:
agg=a.aggregate(['sum','mean','median','mode'])
agg

sum       200.000000
mean       33.333333
median     35.000000
dtype: float64