# Numpy Series

In [3]:
import pandas as pd
import numpy as np

pd.__version__

'2.2.2'

In [4]:
array=np.arange(6)

array

array([0, 1, 2, 3, 4, 5])

In [5]:
new_series=pd.Series(array,name="new_series")

new_series

0    0
1    1
2    2
3    3
4    4
5    5
Name: new_series, dtype: int64

In [6]:
new_series.mean()

2.5

In [7]:
    new_series.dtype

dtype('int64')

In [8]:
new_series.values # built on top numpy arrays

array([0, 1, 2, 3, 4, 5])

In [9]:
new_series.index=[10,20,30,40,50,60]

new_series

10    0
20    1
30    2
40    3
50    4
60    5
Name: new_series, dtype: int64

In [10]:
new_series.reshape(3,2)

new_series

AttributeError: 'Series' object has no attribute 'reshape'

# Type conversion

In [38]:
new_series=pd.Series(np.arange(10))

new_series

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [39]:
new_series.astype('int')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [40]:
new_series.astype('float')

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
9    9.0
dtype: float64

In [41]:
new_series.astype('bool')

0    False
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
dtype: bool

In [42]:
new_series.astype('string')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: string

In [43]:
new_series.astype('object')

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: object

# Pandas Series Slicing and Indexing

In [45]:
series=pd.Series(range(5))

series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [46]:
series[1:3]

1    1
2    2
dtype: int64

In [47]:
series.index=['day 0','day 1','day 2','day 3','day 4']

series

day 0    0
day 1    1
day 2    2
day 3    3
day 4    4
dtype: int64

In [48]:
series['day 1':'day 3']

day 1    1
day 2    2
day 3    3
dtype: int64

## .iloc accessor

In [50]:
series.iloc[2]

2

In [51]:
series.iloc[[0,2,4]]

day 0    0
day 2    2
day 4    4
dtype: int64

In [52]:
series.iloc[1:4]

day 1    1
day 2    2
day 3    3
dtype: int64

## .loc accessor (custom labels)

In [54]:
series.loc['day 1']

1

In [55]:
series.loc[['day 1','day 3']]

day 1    1
day 3    3
dtype: int64

In [56]:
series.loc['day 1':'day 3']

day 1    1
day 2    2
day 3    3
dtype: int64

## duplicate indexes and resetting indexes

In [58]:
series.index=['day 0','day 0','day 0','day 2','day 2']

series

day 0    0
day 0    1
day 0    2
day 2    3
day 2    4
dtype: int64

In [59]:
series.loc['day 0']

day 0    0
day 0    1
day 0    2
dtype: int64

In [60]:
series.reset_index() # generates a dataframe by default.

Unnamed: 0,index,0
0,day 0,0
1,day 0,1
2,day 0,2
3,day 2,3
4,day 2,4


In [61]:
series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

# Filtering series

In [63]:
series.index=['day 0','day 1','day 2','day 3','day 4']

series

day 0    0
day 1    1
day 2    2
day 3    3
day 4    4
dtype: int64

In [64]:
series.loc[series.index=="day 1"]

day 1    1
dtype: int64

In [65]:
series.loc[series.isin([0,2,4])]

day 0    0
day 2    2
day 4    4
dtype: int64

In [66]:
series.loc[~series.isin([0,2,4])]

day 1    1
day 3    3
dtype: int64

# Sorting series

In [68]:
rng=np.random.default_rng(2022)

In [69]:
new_array=rng.random(5)

new_array

array([0.24742606, 0.09299006, 0.61176337, 0.06066207, 0.66103343])

In [70]:
my_series=pd.Series(np.round(new_array*10,decimals=2),index=['day 1','day 3','day 2','day 0','day 4'])

my_series 

day 1    2.47
day 3    0.93
day 2    6.12
day 0    0.61
day 4    6.61
dtype: float64

In [71]:
my_series.sort_values()

day 0    0.61
day 3    0.93
day 1    2.47
day 2    6.12
day 4    6.61
dtype: float64

In [72]:
my_series.sort_index(ascending=True)

day 0    0.61
day 1    2.47
day 2    6.12
day 3    0.93
day 4    6.61
dtype: float64

# Series arithmetic operations

In [74]:
my_series=pd.Series(np.arange(10))

my_series

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [75]:
my_series+2
# or my_series.add(2)

0     2
1     3
2     4
3     5
4     6
5     7
6     8
7     9
8    10
9    11
dtype: int64

In [76]:
my_series//2

0    0
1    0
2    1
3    1
4    2
5    2
6    3
7    3
8    4
9    4
dtype: int64

In [77]:
my_series/2

0    0.0
1    0.5
2    1.0
3    1.5
4    2.0
5    2.5
6    3.0
7    3.5
8    4.0
9    4.5
dtype: float64

# Series string methods

In [79]:
string_series=pd.Series(['day 0','day 1','day 2'])

string_series

0    day 0
1    day 1
2    day 2
dtype: object

In [80]:
string_series.str.contains('day')

0    True
1    True
2    True
dtype: bool

In [81]:
string_series.str[-1].astype('int')

0    0
1    1
2    2
dtype: int64

In [82]:
string_series.str.split(' ',expand=True)

Unnamed: 0,0,1
0,day,0
1,day,1
2,day,2


# Aggregation

In [84]:
import numpy as np
import pandas as pd

In [85]:
rng=np.random.default_rng(2022)

my_series=pd.Series(rng.random(10).round(2))

my_series

0    0.25
1    0.09
2    0.61
3    0.06
4    0.66
5    0.76
6    0.11
7    0.04
8    0.41
9    0.99
dtype: float64

In [86]:
my_series.mean()

0.39799999999999996

In [87]:
my_series.median()

0.32999999999999996

In [88]:
my_series.count()

10

In [89]:
my_series=pd.Series(['day 0','day 0','day 0','day 2','day 2','day 4'])

my_series

0    day 0
1    day 0
2    day 0
3    day 2
4    day 2
5    day 4
dtype: object

In [90]:
my_series.unique()

array(['day 0', 'day 2', 'day 4'], dtype=object)

In [91]:
my_series.nunique()

3

In [92]:
my_series.value_counts(normalize=True).round(2)

day 0    0.50
day 2    0.33
day 4    0.17
Name: proportion, dtype: float64

# Missing data in series

In [143]:
my_series=pd.Series([np.NAN]*5)

my_series

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [147]:
my_series.isna().sum()

5

In [149]:
my_series.astype('int')

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [151]:
my_series=pd.Series(range(5))

my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [157]:
my_series[1:3]=np.NAN

my_series

0    0.0
1    NaN
2    NaN
3    3.0
4    4.0
dtype: float64

In [159]:
my_series.fillna(-1)

0    0.0
1   -1.0
2   -1.0
3    3.0
4    4.0
dtype: float64

In [169]:
my_series.dropna().reset_index(drop=True)

0    0.0
1    3.0
2    4.0
dtype: float64

In [171]:
def search(string,to_find):
    if to_find in string:
        return "Found!"
    else:
        return "Could not find."

In [173]:
my_series=pd.Series(['day 0','day 0','day 2','day 3','day 4'])

my_series

0    day 0
1    day 0
2    day 2
3    day 3
4    day 4
dtype: object

In [181]:
my_series.apply(search,args=('0'))

0             Found!
1             Found!
2    Could not find.
3    Could not find.
4    Could not find.
dtype: object

# Numpy where vs Pandas where

In [190]:
my_series.where(my_series.str.contains("0"),
                "Could not find!").where(~my_series.str.contains("0"),"Found!")

0             Found!
1             Found!
2    Could not find!
3    Could not find!
4    Could not find!
dtype: object