# Pandas Series Operations & Aggregations

In [1]:
import pandas as pd
import numpy as np

In [2]:
series = pd.Series(np.random.randint(1,11,10))

In [3]:
series

0    6
1    1
2    4
3    4
4    2
5    3
6    2
7    5
8    6
9    1
dtype: int32

In [4]:
series + series*2

0    18
1     3
2    12
3    12
4     6
5     9
6     6
7    15
8    18
9     3
dtype: int32

In [5]:
series / series

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
6    1.0
7    1.0
8    1.0
9    1.0
dtype: float64

In [6]:
series//series

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
dtype: int32

In [7]:
series.iloc[[0,2,5]] = np.nan

In [8]:
series + 5

0     NaN
1     6.0
2     NaN
3     9.0
4     7.0
5     NaN
6     7.0
7    10.0
8    11.0
9     6.0
dtype: float64

In [9]:
series.add(5,fill_value=1).astype("int")

0     6
1     6
2     6
3     9
4     7
5     6
6     7
7    10
8    11
9     6
dtype: int32

In [10]:
complex_strings = [
    "Hello, World! 123",
    "Python@OpenAI.com",
    "Data-Science_101",
    "AI*Milano#2024",
    "openAI_for*Life!",
    "GPT-4_is.Amazing",
    "String$Methods%Exercises",
    "Pandas.Series@Python",
    "Machine-Learning&Deep",
    "2024.AI_Revolution!"
]

# Convert the list to a pandas Series
series = pd.Series(complex_strings)

In [11]:
series

0           Hello, World! 123
1           Python@OpenAI.com
2            Data-Science_101
3              AI*Milano#2024
4            openAI_for*Life!
5            GPT-4_is.Amazing
6    String$Methods%Exercises
7        Pandas.Series@Python
8       Machine-Learning&Deep
9         2024.AI_Revolution!
dtype: object

In [12]:
series.str.strip()

0           Hello, World! 123
1           Python@OpenAI.com
2            Data-Science_101
3              AI*Milano#2024
4            openAI_for*Life!
5            GPT-4_is.Amazing
6    String$Methods%Exercises
7        Pandas.Series@Python
8       Machine-Learning&Deep
9         2024.AI_Revolution!
dtype: object

In [13]:
series.str.upper()

0           HELLO, WORLD! 123
1           PYTHON@OPENAI.COM
2            DATA-SCIENCE_101
3              AI*MILANO#2024
4            OPENAI_FOR*LIFE!
5            GPT-4_IS.AMAZING
6    STRING$METHODS%EXERCISES
7        PANDAS.SERIES@PYTHON
8       MACHINE-LEARNING&DEEP
9         2024.AI_REVOLUTION!
dtype: object

In [14]:
series.str.lower()

0           hello, world! 123
1           python@openai.com
2            data-science_101
3              ai*milano#2024
4            openai_for*life!
5            gpt-4_is.amazing
6    string$methods%exercises
7        pandas.series@python
8       machine-learning&deep
9         2024.ai_revolution!
dtype: object

In [15]:
series.str.slice(2,6)

0    llo,
1    thon
2    ta-S
3    *Mil
4    enAI
5    T-4_
6    ring
7    ndas
8    chin
9    24.A
dtype: object

In [16]:
series.str.slice(step=2)

0       Hlo ol!13
1       Pto@pnIcm
2        Dt-cec_0
3         A*iao22
4        oeA_o*ie
5        GT4i.mzn
6    Srn$ehd%xrie
7      Pna.eisPto
8     McieLann&ep
9      22.IRvlto!
dtype: object

In [17]:
series.str.count("a")

0    0
1    0
2    2
3    1
4    0
5    1
6    0
7    2
8    2
9    0
dtype: int64

In [18]:
# "." is treated as regex thats why you have to escape it using \\
series.str.count("\\.a")

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
dtype: int64

In [19]:
series.str.contains("\\.")

0    False
1     True
2    False
3    False
4    False
5     True
6    False
7     True
8    False
9     True
dtype: bool

In [20]:
series.str.replace("a","bbbbbb").replace("A","bbbbbbb")

0                  Hello, World! 123
1                  Python@OpenAI.com
2         Dbbbbbbtbbbbbb-Science_101
3                AI*Milbbbbbbno#2024
4                   openAI_for*Life!
5              GPT-4_is.Ambbbbbbzing
6           String$Methods%Exercises
7     Pbbbbbbndbbbbbbs.Series@Python
8    Mbbbbbbchine-Lebbbbbbrning&Deep
9                2024.AI_Revolution!
dtype: object

In [21]:
series.str.split(" ",expand = True)

Unnamed: 0,0,1,2
0,"Hello,",World!,123.0
1,Python@OpenAI.com,,
2,Data-Science_101,,
3,AI*Milano#2024,,
4,openAI_for*Life!,,
5,GPT-4_is.Amazing,,
6,String$Methods%Exercises,,
7,Pandas.Series@Python,,
8,Machine-Learning&Deep,,
9,2024.AI_Revolution!,,


In [22]:
series.str.len()

0    17
1    17
2    16
3    14
4    16
5    16
6    24
7    20
8    21
9    19
dtype: int64

In [23]:
series.str.startswith("Hello")

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

Course assignment: series operations

In [24]:
import pandas as pd
import numpy as np

In [25]:
oil = pd.read_csv("../pandas-course/Pandas Course Resources/retail/oil.csv").dropna()
dates = pd.Series(oil["date"]).iloc[1000:1100]
oil_array = np.array(oil["dcoilwtico"].iloc[1000:1100])
oil_series = pd.Series(oil_array)
oil_series.index = dates

In [26]:
oil_change = oil_series*1.1+2
oil_change

date
2016-12-20    59.442
2016-12-21    58.584
2016-12-22    59.178
2016-12-23    59.211
2016-12-27    60.102
               ...  
2017-05-09    52.424
2017-05-10    54.008
2017-05-11    54.591
2017-05-12    54.613
2017-05-15    55.746
Length: 100, dtype: float64

In [27]:
max_oil_price = oil_series.max()

In [44]:
test = oil_series.reset_index().loc[:,"date"].str.split("-",expand=True)

In [52]:
test.columns = [str(col) for col in test.columns]

Aggregations

In [58]:
oil_series.count()

100

In [59]:
oil_series.max()

54.48

In [61]:
oil_series.min()

45.55

In [63]:
oil_series.argmax()

43

In [64]:
oil_series.median()

52.19

In [65]:
oil_series.mean()

51.128299999999996

In [66]:
series

0           Hello, World! 123
1           Python@OpenAI.com
2            Data-Science_101
3              AI*Milano#2024
4            openAI_for*Life!
5            GPT-4_is.Amazing
6    String$Methods%Exercises
7        Pandas.Series@Python
8       Machine-Learning&Deep
9         2024.AI_Revolution!
dtype: object

In [67]:
series.value_counts()

Hello, World! 123           1
Python@OpenAI.com           1
Data-Science_101            1
AI*Milano#2024              1
openAI_for*Life!            1
GPT-4_is.Amazing            1
String$Methods%Exercises    1
Pandas.Series@Python        1
Machine-Learning&Deep       1
2024.AI_Revolution!         1
Name: count, dtype: int64

In [68]:
series.unique()

array(['Hello, World! 123', 'Python@OpenAI.com', 'Data-Science_101',
       'AI*Milano#2024', 'openAI_for*Life!', 'GPT-4_is.Amazing',
       'String$Methods%Exercises', 'Pandas.Series@Python',
       'Machine-Learning&Deep', '2024.AI_Revolution!'], dtype=object)

In [69]:
series.nunique()

10

In [73]:
print(oil_series.loc["2017-03-1":"2017-03-31"].mean())
print(oil_series.loc["2017-03-1":"2017-03-31"].sum())

48.144375000000004
770.3100000000001


In [97]:
oil_series.loc["2017-01-01":"2017-02-31"].count()


39

In [108]:
oil_series.astype("int").value_counts().loc[[51,52]]

51     7
52    22
Name: count, dtype: int64