## Arithmetic Operators and Methods

* You can use these operators and methods to perform numeric operations on Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
operation = ['Addition', 'Subtraction', 'Mutliplication', 'Division', 'Floor Division', 'Modulo', 'Exponentiation']
python_op = ['+', '-', '*', '/', '//', '%', '**']
pandas_method = ['.add', '.sub(), .subtract()', '.mul(), .multiply()', '.div(), .truediv(), .divide()', '.floordiv()', '.mod()', '.pow()']

df = pd.DataFrame({'Operation':operation, 'Python Operator':python_op, 'Pandas Method':pandas_method})
df

Unnamed: 0,Operation,Python Operator,Pandas Method
0,Addition,+,.add
1,Subtraction,-,".sub(), .subtract()"
2,Mutliplication,*,".mul(), .multiply()"
3,Division,/,".div(), .truediv(), .divide()"
4,Floor Division,//,.floordiv()
5,Modulo,%,.mod()
6,Exponentiation,**,.pow()


In [3]:
sales = [0, 5, 155, 0, 518]
monday_sales = pd.Series(sales, name="Sales")
monday_sales

0      0
1      5
2    155
3      0
4    518
Name: Sales, dtype: int64

In [4]:
monday_sales + 2 # Generally python operators are better to use in pandas

0      2
1      7
2    157
3      2
4    520
Name: Sales, dtype: int64

In [5]:
"$" + monday_sales.astype("float").astype("string") # we're casting the variable 'monday_sales' as a float to add the decimal and cents
# then again to a string to concat with string addition witht the dollar sign $

0      $0.0
1      $5.0
2    $155.0
3      $0.0
4    $518.0
Name: Sales, dtype: string

In [14]:
my_series = pd.Series(
    [1, 2, 3, 4, 5], index=["Day 0", "Day 1", "Day 2", "Day 3", "Day 4"])

my_series

Day 0    1
Day 1    2
Day 2    3
Day 3    4
Day 4    5
dtype: int64

In [15]:
my_series + 1

Day 0    2
Day 1    3
Day 2    4
Day 3    5
Day 4    6
dtype: int64

In [21]:
my_series = pd.Series(
    [1, np.NaN, 3, np.NaN, 5], index=["Day 0", "Day 1", "Day 2", "Day 3", "Day 4"]) # if out series has any NaN values we might find use using a pandas method

my_series

Day 0    1.0
Day 1    NaN
Day 2    3.0
Day 3    NaN
Day 4    5.0
dtype: float64

In [27]:
my_series.add(1, fill_value=0) # fill_value argument fills in missing values but only with the value given in this case we're replacing NaN with 0.
# So the result will be 0 + 1 for both rows that are NaN

Day 0    2.0
Day 1    1.0
Day 2    4.0
Day 3    1.0
Day 4    6.0
dtype: float64

In [36]:
my_series2 = my_series.add(1, fill_value=0).astype('int') # if we cast a series as an int it will become more compatable with numeric operators

In [35]:
my_series2 / 2 # notice when we perform division the data type automaticly changes to a float. Just take note of the dtype

Day 0    1.0
Day 1    0.5
Day 2    2.0
Day 3    0.5
Day 4    3.0
dtype: float64

In [33]:
my_series + my_series2

Day 0     3.0
Day 1     NaN
Day 2     7.0
Day 3     NaN
Day 4    11.0
dtype: float64

## String Methods

* The Pandas str accessor lets you access many string methods
* These methods all return a Series (split returns multiple series)

In [7]:
string_method = ['.strip(), .lstrip(), .rstrip()', '.upper(), .lower()', '.slice(star:stop:step)', '.count("string")', '.contains("string")',
                  '.replace("a", "b")', '.split("delimiter", expand=True)', '.len()', '.startswith("string"), .endswith("string")']

description = ["Removes all leading and/ or trailing characters(spaces by default)", "Converts all characaters to upper or lower case", 
               "Applies a sclice to the stings in a Series", "Counts all instances of a given string", "Returns True if a given string is found; False if not",
               """Replaces instances of string with "a" with string "b" """, "Splits strings based on a given delimiter string, and returns a DataFrame with a Series for each split",
                "Returns the length of each string in a Series", "Returns True if a string starts or ends with given string; False if not"]

df2 = pd.DataFrame({'String Method': string_method, 'Description':description})
pd.set_option('display.max_colwidth', None)
styled_df = df2.style.set_properties(**{'text-align': 'left'})
styled_df

Unnamed: 0,String Method,Description
0,".strip(), .lstrip(), .rstrip()",Removes all leading and/ or trailing characters(spaces by default)
1,".upper(), .lower()",Converts all characaters to upper or lower case
2,.slice(star:stop:step),Applies a sclice to the stings in a Series
3,".count(""string"")",Counts all instances of a given string
4,".contains(""string"")",Returns True if a given string is found; False if not
5,".replace(""a"", ""b"")","Replaces instances of string with ""a"" with string ""b"""
6,".split(""delimiter"", expand=True)","Splits strings based on a given delimiter string, and returns a DataFrame with a Series for each split"
7,.len(),Returns the length of each string in a Series
8,".startswith(""string""), .endswith(""string"")",Returns True if a string starts or ends with given string; False if not


In [28]:
string_series = pd.Series(["Day 0", "Day 1", "Day 2", "Day 3", "Day 4"])
string_series

0    Day 0
1    Day 1
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [11]:
string_series.str.contains("Day 1") # acts like a filter. Note the .str accessor to use string methods

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [14]:
string_series.str.upper().str.contains('day 1') # We can chain methods together

0    False
1    False
2    False
3    False
4    False
dtype: bool

In [18]:
string_series.str.strip('Day').astype('int') # change datatypes

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [22]:
string_series.str.slice(-1).astype('int') # we can slice also

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [26]:
string_series.str[-1].astype('int') # we can also slice without the method

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [27]:
string_series.str[1:3]

0    ay
1    ay
2    ay
3    ay
4    ay
dtype: object

In [31]:
string_series.str.split(' ') # if we split with only a space this will create a list stored as a series.

0    [Day, 0]
1    [Day, 1]
2    [Day, 2]
3    [Day, 3]
4    [Day, 4]
dtype: object

In [32]:
string_series.str.split(' ', expand=True) # But if we ad the expand=True argument we will expand each elemnt into their own column

Unnamed: 0,0,1
0,Day,0
1,Day,1
2,Day,2
3,Day,3
4,Day,4
