In [1]:
import pandas as pd
import numpy as np

In [2]:
#pd.series is used to convert array into pandas series

array = np.arange(5)
series = pd.Series(array)

In [3]:
#name is optional
pd.Series(np.arange(5),name = 'Test array')

0    0
1    1
2    2
3    3
4    4
Name: Test array, dtype: int32

In [4]:
series.values

array([0, 1, 2, 3, 4])

In [5]:
series.values.mean()

2.0

In [6]:
#assaigning new index values
series.index = [10,20,30,40,50]
series

10    0
20    1
30    2
40    3
50    4
dtype: int32

In [7]:
series.name = 'duck'
series

10    0
20    1
30    2
40    3
50    4
Name: duck, dtype: int32

In [8]:
series.dtype

dtype('int32')

In [9]:
pd.Series(range(5))

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [10]:
#astype() is used to convert the data type
pd.Series(range(5)).astype('float')

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [11]:
pd.Series(range(5)).astype('bool')

0    False
1     True
2     True
3     True
4     True
dtype: bool

In [12]:
pd.Series(range(5)).astype('object')

0    0
1    1
2    2
3    3
4    4
dtype: object

In [13]:
pd.Series(range(5)).astype('string')

0    0
1    1
2    2
3    3
4    4
dtype: string

In [14]:
pd.Series(['a','b','c'])

0    a
1    b
2    c
dtype: object

In [15]:
my_series = pd.Series(range(5))
my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [16]:
#indexing
my_series[3]

3

In [17]:
#stop is exclusive
my_series[2:4]


2    2
3    3
dtype: int64

In [18]:
#indexing with stepsize
my_series[1::2]

1    1
3    3
dtype: int64

In [19]:
days = ['day1','day2','day3','day4','day5']
my_series.index = days
my_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [20]:
my_series['day1']


0

In [21]:
my_series["day2":"day4"]

day2    1
day3    2
day4    3
dtype: int64

In [22]:
my_series[::2]

day1    0
day3    2
day5    4
dtype: int64

In [23]:
#iloc[] method
new_series = pd.Series([0,1,2,3,4], index=['day1','day2','day3','day4','day5'])
new_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [24]:
new_series.iloc[1]

1

In [25]:
new_series.iloc[[1,3,4]]

day2    1
day4    3
day5    4
dtype: int64

In [26]:
new_series.iloc[1:3]  

day2    1
day3    2
dtype: int64

In [27]:
#loc[] used with index values
new_series.loc['day2']

1

In [28]:
new_series.loc['day1':'day3']

day1    0
day2    1
day3    2
dtype: int64

In [29]:
new_series.index =[0,2,3,100,5]

In [30]:
#index value dosent drop,  the value dropped
new_series[new_series != 2]


0      0
2      1
100    3
5      4
dtype: int64

In [31]:
new_series.loc[0:5]

0      0
2      1
3      2
100    3
5      4
dtype: int64

In [32]:
new_series[100]

3

In [33]:
#reset index
new_series = pd.Series([0,1,2,3,4], index=['day0','day0','day0','day2','day2'])
new_series.index

Index(['day0', 'day0', 'day0', 'day2', 'day2'], dtype='object')

In [34]:
 new_series['day0']

day0    0
day0    1
day0    2
dtype: int64

In [35]:
new_series.reset_index()

Unnamed: 0,index,0
0,day0,0
1,day0,1
2,day0,2
3,day2,3
4,day2,4


In [36]:
new_series.reset_index(drop=True).loc[2:4]

2    2
3    3
4    4
dtype: int64

In [37]:
#logical operator 
new_series != 2

day0     True
day0     True
day0    False
day2     True
day2     True
dtype: bool

In [38]:
new_series[new_series != 2 ]

day0    0
day0    1
day2    3
day2    4
dtype: int64

In [39]:
#~is used to check this inversily
new_series[~(new_series != 2) ]

day0    2
dtype: int64

In [40]:
my_series.loc[my_series.isin([1,2])]

day2    1
day3    2
dtype: int64

In [41]:
my_series.loc[~my_series.isin([1,2])]

day1    0
day4    3
day5    4
dtype: int64

In [42]:
my_series.loc[my_series > 2]

day4    3
day5    4
dtype: int64

In [43]:
mask = (my_series.isin([1,2])) | (my_series > 2)
my_series.loc[mask]

day2    1
day3    2
day4    3
day5    4
dtype: int64

In [44]:
my_series.isin([1,2])

day1    False
day2     True
day3     True
day4    False
day5    False
dtype: bool

In [45]:
my_series[my_series.isin([1,2])]

day2    1
day3    2
dtype: int64

In [46]:
#sorting series
new_series = pd.Series([0,1,2,3,4], index=['day1','day2','day3','day4','day5'])
new_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [47]:
new_series.sort_values()

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [48]:
#sorting in descending order
new_series.sort_values(ascending = False)

day5    4
day4    3
day3    2
day2    1
day1    0
dtype: int64

In [49]:
new_series.sort_index()

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [50]:
new_series_sort = new_series.sort_index(ascending = False)

In [51]:
new_series_sort


day5    4
day4    3
day3    2
day2    1
day1    0
dtype: int64

In [52]:
#arthmatic operators & methods 
new_series = pd.Series([0,pd.NA,2,3,4], index=['day1','day2','day3','day4','day5'])
new_series

day1       0
day2    <NA>
day3       2
day4       3
day5       4
dtype: object

In [53]:
new_series +1

day1       1
day2    <NA>
day3       3
day4       4
day5       5
dtype: object

In [54]:
new_series2 = new_series.add(1,fill_value=0).astype('int')

In [55]:
new_series2

day1    1
day2    1
day3    3
day4    4
day5    5
dtype: int32

In [56]:
new_series2 /2

day1    0.5
day2    0.5
day3    1.5
day4    2.0
day5    2.5
dtype: float64

In [57]:
#// is used for floor division and it returns nearest whole number
new_series2//2

day1    0
day2    0
day3    1
day4    2
day5    2
dtype: int32

In [58]:
new_series + my_series

day1       0
day2    <NA>
day3       4
day4       6
day5       8
dtype: object

In [59]:
new_series.add(my_series,fill_value=0) #to remove nan 

day1    0
day2    1
day3    4
day4    6
day5    8
dtype: object

In [60]:
#string methods
string_series = pd.Series(['day1','day2','day3','day4','day5'])
string_series

0    day1
1    day2
2    day3
3    day4
4    day5
dtype: object

In [61]:
string_series.str.contains('1')

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [62]:
string_series.str.upper()

0    DAY1
1    DAY2
2    DAY3
3    DAY4
4    DAY5
dtype: object

In [63]:
# to combine two string we have to .str again
string_series.str.upper().str.contains('DAY1')

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [64]:
string_series.str.strip('day').astype('int')

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [65]:
string_series.str[1:3]

0    ay
1    ay
2    ay
3    ay
4    ay
dtype: object

In [66]:
string_series.str.split(' ',expand =True)

Unnamed: 0,0
0,day1
1,day2
2,day3
3,day4
4,day5


In [67]:
#aggregrate numerical series


In [68]:
#categorial series aggregration 
string_series2 = pd.Series(['day0','day0','day2','day2','day4'])
string_series2

0    day0
1    day0
2    day2
3    day2
4    day4
dtype: object

In [69]:
string_series2.unique()

array(['day0', 'day2', 'day4'], dtype=object)

In [70]:
string_series2.nunique()

3

In [71]:
string_series2.value_counts()

day0    2
day2    2
day4    1
Name: count, dtype: int64

In [72]:
string_series2.value_counts(normalize = True)

day0    0.4
day2    0.4
day4    0.2
Name: proportion, dtype: float64

In [73]:
#missing data's and handling them
#can't convert NaN into interger
my_series2 = pd.Series([np.NaN] * 5)
my_series2

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [74]:
my_series2.isna().count()

5

In [75]:
my_series2.isna()

0    True
1    True
2    True
3    True
4    True
dtype: bool

In [76]:
my_series2.value_counts()

Series([], Name: count, dtype: int64)

In [77]:
my_series2.value_counts(dropna=False)

NaN    5
Name: count, dtype: int64

In [78]:
my_series2 = pd.Series([pd.NA] * 5)
my_series2

0    <NA>
1    <NA>
2    <NA>
3    <NA>
4    <NA>
dtype: object

In [79]:
#we can convert NA into integer but not NaN
my_series2.astype('Int64')

0    <NA>
1    <NA>
2    <NA>
3    <NA>
4    <NA>
dtype: Int64

In [80]:
my_series3 = pd.Series(range(5))

In [81]:
my_series3.loc[1:2] = pd.NA

In [82]:
#bug in pd.NA , its still showing NaN
my_series3

0    0.0
1    NaN
2    NaN
3    3.0
4    4.0
dtype: float64

In [83]:
my_series3.value_counts(dropna=False)

NaN    2
0.0    1
3.0    1
4.0    1
Name: count, dtype: int64

In [84]:
my_series3.fillna(my_series3.mean())

0    0.000000
1    2.333333
2    2.333333
3    3.000000
4    4.000000
dtype: float64

In [85]:
my_series.fillna(0)

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [86]:
my_series.dropna()

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [87]:
my_series.dropna().reset_index(drop=True).astype('int')

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [88]:
#.appply() method
#this the first method how you can use

def discount(price):
    if price>20:
        return round(price*0.9,2)
    return price

In [89]:
clean_wholesale = pd.Series(['3.99','5.99','22.99','7.99','33.99']).astype('float')
clean_wholesale

0     3.99
1     5.99
2    22.99
3     7.99
4    33.99
dtype: float64

In [90]:
clean_wholesale.apply(discount)

0     3.99
1     5.99
2    20.69
3     7.99
4    30.59
dtype: float64

In [91]:
#this the second method with use of lambda
clean_wholesale.apply(lambda x: round(x*0.9,2) if x>20 else x)

0     3.99
1     5.99
2    20.69
3     7.99
4    30.59
dtype: float64

In [92]:
string_series2

0    day0
1    day0
2    day2
3    day2
4    day4
dtype: object

In [95]:
def search(string,looking_for):
    if looking_for in string:
        return 'found it'
    return 'nope'

In [109]:
string_series2.apply(search, args ='2')

0        nope
1        nope
2    found it
3    found it
4        nope
dtype: object

In [113]:
#.where()
string_series2.where(string_series2.str.contains('2'),'nope').where(~string_series2.str.contains('2'), 'found it')

0        nope
1        nope
2    found it
3    found it
4        nope
dtype: object

In [118]:
#ye sbb backchodi na hi isiliye hum np.where use krtay hai 
pd.Series(np.where(string_series2.str.contains('2'), 'found it','nope'))

0        nope
1        nope
2    found it
3    found it
4        nope
dtype: object