#### Konwledge Sharing Content

<img src="https://pandas.pydata.org/static/img/pandas.svg" width="250">

# <center> Strings & Dates

#### By - [Bhanu Pratap Singh](https://www.linkedin.com/in/bpst/)

In [1]:
# importing packages
import pandas as pd

In [2]:
# create string list
names = pd.Series(['Ram, Singh', 'Mohan; Payare ', ' Johan, Cena', 'Jingle, Bell', 'Yanna, Raskala '])
names

0         Ram, Singh
1     Mohan; Payare 
2        Johan, Cena
3       Jingle, Bell
4    Yanna, Raskala 
dtype: object

In [3]:
# replace function
names = names.str.replace(';', ',')
names

0         Ram, Singh
1     Mohan, Payare 
2        Johan, Cena
3       Jingle, Bell
4    Yanna, Raskala 
dtype: object

In [4]:
# find lenght of characters
names.str.len()

0    10
1    14
2    12
3    12
4    15
dtype: int64

In [5]:
# remove leading and trailing spaces
names = names.str.strip()
names.str.len()

0    10
1    13
2    11
3    12
4    14
dtype: int64

In [6]:
# convert to upper case
names = names.str.upper()
names

0        RAM, SINGH
1     MOHAN, PAYARE
2       JOHAN, CENA
3      JINGLE, BELL
4    YANNA, RASKALA
dtype: object

In [7]:
# convert to lower case
names = names.str.lower()
names

0        ram, singh
1     mohan, payare
2       johan, cena
3      jingle, bell
4    yanna, raskala
dtype: object

##### swap first name with last name

In [8]:
# create tuple of last name and first name
names = names.str.split(', ')
names

0        [ram, singh]
1     [mohan, payare]
2       [johan, cena]
3      [jingle, bell]
4    [yanna, raskala]
dtype: object

In [9]:
# swap order of last name and first name
names = pd.Series([i[::-1] for i in names])
names

0        [singh, ram]
1     [payare, mohan]
2       [cena, johan]
3      [bell, jingle]
4    [raskala, yanna]
dtype: object

In [10]:
# join back first and last names with space
names_with_space = [' '.join(i) for i in names]
names_with_space

['singh ram', 'payare mohan', 'cena johan', 'bell jingle', 'raskala yanna']

In [11]:
# join back first and last names with comma
names_with_comma = [', '.join(i) for i in names]
names_with_comma

['singh, ram',
 'payare, mohan',
 'cena, johan',
 'bell, jingle',
 'raskala, yanna']

## Time Series Data

#### explore `period_range()` function -  to generate series of date

In [12]:
# create a data range
date_range = pd.period_range('1/5/2021', freq='30d', periods=9)

In [13]:
# assign dataframe to date range
df = pd.DataFrame(data=date_range, columns=['sample date'])
df

Unnamed: 0,sample date
0,2021-01-05
1,2021-02-04
2,2021-03-06
3,2021-04-05
4,2021-05-05
5,2021-06-04
6,2021-07-04
7,2021-08-03
8,2021-09-02


#### explore `diff()` function - find date diffrence from prior date

In [14]:
# find diffrence from fifth period
df['date diff'] = df['sample date'].diff(periods=5)
df

Unnamed: 0,sample date,date diff
0,2021-01-05,NaT
1,2021-02-04,NaT
2,2021-03-06,NaT
3,2021-04-05,NaT
4,2021-05-05,NaT
5,2021-06-04,<150 * Days>
6,2021-07-04,<150 * Days>
7,2021-08-03,<150 * Days>
8,2021-09-02,<150 * Days>


In [15]:
# find diffrence from first period
df['date diff'] = df['sample date'].diff(periods=1)
df

Unnamed: 0,sample date,date diff
0,2021-01-05,NaT
1,2021-02-04,<30 * Days>
2,2021-03-06,<30 * Days>
3,2021-04-05,<30 * Days>
4,2021-05-05,<30 * Days>
5,2021-06-04,<30 * Days>
6,2021-07-04,<30 * Days>
7,2021-08-03,<30 * Days>
8,2021-09-02,<30 * Days>


#### find first day of the month

In [16]:
# check values of dataframe
df['sample date'].values

array([Period('2021-01-05', '30D'), Period('2021-02-04', '30D'),
       Period('2021-03-06', '30D'), Period('2021-04-05', '30D'),
       Period('2021-05-05', '30D'), Period('2021-06-04', '30D'),
       Period('2021-07-04', '30D'), Period('2021-08-03', '30D'),
       Period('2021-09-02', '30D')], dtype=object)

In [17]:
df['first of month'] = df['sample date'].values.astype('datetime64[M]')
df

Unnamed: 0,sample date,date diff,first of month
0,2021-01-05,NaT,2021-01-01
1,2021-02-04,<30 * Days>,2021-02-01
2,2021-03-06,<30 * Days>,2021-03-01
3,2021-04-05,<30 * Days>,2021-04-01
4,2021-05-05,<30 * Days>,2021-05-01
5,2021-06-04,<30 * Days>,2021-06-01
6,2021-07-04,<30 * Days>,2021-07-01
7,2021-08-03,<30 * Days>,2021-08-01
8,2021-09-02,<30 * Days>,2021-09-01


### Date Types

In [18]:
# checking data types
df.dtypes

sample date          period[30D]
date diff                 object
first of month    datetime64[ns]
dtype: object

#### explore `to_timestamp()` function

In [19]:
# convert period to datetime64
df['sample date'] = df['sample date'].dt.to_timestamp()
df.dtypes

sample date       datetime64[ns]
date diff                 object
first of month    datetime64[ns]
dtype: object

### Date Substraction

In [20]:
# substract dates
df['sample date'] - df['first of month']

0   4 days
1   3 days
2   5 days
3   4 days
4   4 days
5   3 days
6   3 days
7   2 days
8   1 days
dtype: timedelta64[ns]

In [21]:
# substract dates
df['sample date'] - df['date diff']



0          NaT
1   2021-01-05
2   2021-02-04
3   2021-03-06
4   2021-04-05
5   2021-05-05
6   2021-06-04
7   2021-07-04
8   2021-08-03
dtype: datetime64[ns]

#### explore `pd.Timedelta()` function 

In [22]:
# substract
df['sample date'] - pd.Timedelta('30 d')

0   2020-12-06
1   2021-01-05
2   2021-02-04
3   2021-03-06
4   2021-04-05
5   2021-05-05
6   2021-06-04
7   2021-07-04
8   2021-08-03
Name: sample date, dtype: datetime64[ns]

In [23]:
# adding
df['sample date'] + pd.Timedelta('5 d')

0   2021-01-10
1   2021-02-09
2   2021-03-11
3   2021-04-10
4   2021-05-10
5   2021-06-09
6   2021-07-09
7   2021-08-08
8   2021-09-07
Name: sample date, dtype: datetime64[ns]

#### explore `dt.day_name()` function 

In [24]:
df['sample date'].dt.day_name()

0      Tuesday
1     Thursday
2     Saturday
3       Monday
4    Wednesday
5       Friday
6       Sunday
7      Tuesday
8     Thursday
Name: sample date, dtype: object