# Numpy Series

In [54]:
import pandas as pd
import numpy as np

pd.__version__

'2.2.2'

In [56]:
array=np.arange(6)

array

array([0, 1, 2, 3, 4, 5])

In [58]:
new_series=pd.Series(array,name="new_series")

new_series

0    0
1    1
2    2
3    3
4    4
5    5
Name: new_series, dtype: int64

In [60]:
new_series.mean()

2.5

In [62]:
    new_series.dtype

dtype('int64')

In [64]:
new_series.values # built on top numpy arrays

array([0, 1, 2, 3, 4, 5])

In [66]:
new_series.index=[10,20,30,40,50,60]

new_series

10    0
20    1
30    2
40    3
50    4
60    5
Name: new_series, dtype: int64

In [68]:
new_series.reshape(3,2)

new_series

AttributeError: 'Series' object has no attribute 'reshape'

# Type conversion

In [71]:
new_series=pd.Series(np.arange(10))

new_series

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [73]:
new_series.astype('int')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [75]:
new_series.astype('float')

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
9    9.0
dtype: float64

In [77]:
new_series.astype('bool')

0    False
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
dtype: bool

In [79]:
new_series.astype('string')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: string

In [81]:
new_series.astype('object')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: object

# Pandas Series Slicing and Indexing

In [84]:
series=pd.Series(range(5))

series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [86]:
series[1:3]

1    1
2    2
dtype: int64

In [88]:
series.index=['day 0','day 1','day 2','day 3','day 4']

series

day 0    0
day 1    1
day 2    2
day 3    3
day 4    4
dtype: int64

In [90]:
series['day 1':'day 3']

day 1    1
day 2    2
day 3    3
dtype: int64

## .iloc accessor

In [93]:
series.iloc[2]

2

In [95]:
series.iloc[[0,2,4]]

day 0    0
day 2    2
day 4    4
dtype: int64

In [97]:
series.iloc[1:4]

day 1    1
day 2    2
day 3    3
dtype: int64

## .loc accessor (custom labels)

In [100]:
series.loc['day 1']

1

In [102]:
series.loc[['day 1','day 3']]

day 1    1
day 3    3
dtype: int64

In [104]:
series.loc['day 1':'day 3']

day 1    1
day 2    2
day 3    3
dtype: int64

## duplicate indexes and resetting indexes

In [107]:
series.index=['day 0','day 0','day 0','day 2','day 2']

series

day 0    0
day 0    1
day 0    2
day 2    3
day 2    4
dtype: int64

In [109]:
series.loc['day 0']

day 0    0
day 0    1
day 0    2
dtype: int64

In [111]:
series.reset_index() # generates a dataframe by default.

Unnamed: 0,index,0
0,day 0,0
1,day 0,1
2,day 0,2
3,day 2,3
4,day 2,4


In [113]:
series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

# Filtering series

In [116]:
series.index=['day 0','day 1','day 2','day 3','day 4']

series

day 0    0
day 1    1
day 2    2
day 3    3
day 4    4
dtype: int64

In [118]:
series.loc[series.index=="day 1"]

day 1    1
dtype: int64

In [120]:
series.loc[series.isin([0,2,4])]

day 0    0
day 2    2
day 4    4
dtype: int64

In [122]:
series.loc[~series.isin([0,2,4])]

day 1    1
day 3    3
dtype: int64

# Sorting series

In [125]:
rng=np.random.default_rng(2022)

In [127]:
new_array=rng.random(5)

new_array

array([0.24742606, 0.09299006, 0.61176337, 0.06066207, 0.66103343])

In [129]:
my_series=pd.Series(np.round(new_array*10,decimals=2),index=['day 1','day 3','day 2','day 0','day 4'])

my_series 

day 1    2.47
day 3    0.93
day 2    6.12
day 0    0.61
day 4    6.61
dtype: float64

In [131]:
my_series.sort_values()

day 0    0.61
day 3    0.93
day 1    2.47
day 2    6.12
day 4    6.61
dtype: float64

In [133]:
my_series.sort_index(ascending=True)

day 0    0.61
day 1    2.47
day 2    6.12
day 3    0.93
day 4    6.61
dtype: float64

# Series arithmetic operations

In [144]:
my_series=pd.Series(np.arange(10))

my_series

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [150]:
my_series+2
# or my_series.add(2)

0     2
1     3
2     4
3     5
4     6
5     7
6     8
7     9
8    10
9    11
dtype: int64

In [154]:
my_series//2

0    0
1    0
2    1
3    1
4    2
5    2
6    3
7    3
8    4
9    4
dtype: int64

In [156]:
my_series/2

0    0.0
1    0.5
2    1.0
3    1.5
4    2.0
5    2.5
6    3.0
7    3.5
8    4.0
9    4.5
dtype: float64

# Series string methods

In [164]:
string_series=pd.Series(['day 0','day 1','day 2'])

string_series

0    day 0
1    day 1
2    day 2
dtype: object

In [168]:
string_series.str.contains('day')

0    True
1    True
2    True
dtype: bool

In [172]:
string_series.str[-1].astype('int')

0    0
1    1
2    2
dtype: int64

In [180]:
string_series.str.split(' ',expand=True)

Unnamed: 0,0,1
0,day,0
1,day,1
2,day,2


# Aggregation

In [4]:
import numpy as np
import pandas as pd

In [13]:
rng=np.random.default_rng(2022)

my_series=pd.Series(rng.random(10).round(2))

my_series

0    0.25
1    0.09
2    0.61
3    0.06
4    0.66
5    0.76
6    0.11
7    0.04
8    0.41
9    0.99
dtype: float64

In [15]:
my_series.mean()

0.39799999999999996

In [17]:
my_series.median()

0.32999999999999996

In [23]:
my_series.count()

10