# Extract month and year from column in Pandas, create new column

Based on: https://www.interviewqs.com/ddi_code_snippets/extract_month_year_pandas

In [1]:
import pandas as pd
import numpy as np
import datetime

In [2]:
raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'],
'age': [20, 19, 22, 21],
'favorite_color': ['blue', 'red', 'yellow', "green"],
'grade': [88, 92, 95, 70],
'birth_date': ['01-02-1996', '08-05-1997', '04-28-1996', '12-16-1995']}
df = pd.DataFrame(raw_data, index = ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'])
df

Unnamed: 0,name,age,favorite_color,grade,birth_date
Willard Morris,Willard Morris,20,blue,88,01-02-1996
Al Jennings,Al Jennings,19,red,92,08-05-1997
Omar Mullins,Omar Mullins,22,yellow,95,04-28-1996
Spencer McDaniel,Spencer McDaniel,21,green,70,12-16-1995


In [3]:
# pandas datetimeindex docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DatetimeIndex.html
# efficient way to extract year from string format date
df['year'] = pd.DatetimeIndex(df['birth_date']).year
df.head()

Unnamed: 0,name,age,favorite_color,grade,birth_date,year
Willard Morris,Willard Morris,20,blue,88,01-02-1996,1996
Al Jennings,Al Jennings,19,red,92,08-05-1997,1997
Omar Mullins,Omar Mullins,22,yellow,95,04-28-1996,1996
Spencer McDaniel,Spencer McDaniel,21,green,70,12-16-1995,1995


In [4]:
# pandas datetimeindex docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DatetimeIndex.html
df['month'] = pd.DatetimeIndex(df['birth_date']).month
df.head()

Unnamed: 0,name,age,favorite_color,grade,birth_date,year,month
Willard Morris,Willard Morris,20,blue,88,01-02-1996,1996,1
Al Jennings,Al Jennings,19,red,92,08-05-1997,1997,8
Omar Mullins,Omar Mullins,22,yellow,95,04-28-1996,1996,4
Spencer McDaniel,Spencer McDaniel,21,green,70,12-16-1995,1995,12


In [5]:
# if the date format comes in datetime, we can also extract the day/month/year using the to_period function
# where 'D', 'M', 'Y' are inputs
df['month_year'] = pd.to_datetime(df['birth_date']).dt.to_period('M')
df.head()

Unnamed: 0,name,age,favorite_color,grade,birth_date,year,month,month_year
Willard Morris,Willard Morris,20,blue,88,01-02-1996,1996,1,1996-01
Al Jennings,Al Jennings,19,red,92,08-05-1997,1997,8,1997-08
Omar Mullins,Omar Mullins,22,yellow,95,04-28-1996,1996,4,1996-04
Spencer McDaniel,Spencer McDaniel,21,green,70,12-16-1995,1995,12,1995-12


In [6]:
# get the first day of that month
df['month_first_day'] = pd.to_datetime(df['birth_date']).values.astype('datetime64[M]')
df.head()

Unnamed: 0,name,age,favorite_color,grade,birth_date,year,month,month_year,month_first_day
Willard Morris,Willard Morris,20,blue,88,01-02-1996,1996,1,1996-01,1996-01-01
Al Jennings,Al Jennings,19,red,92,08-05-1997,1997,8,1997-08,1997-08-01
Omar Mullins,Omar Mullins,22,yellow,95,04-28-1996,1996,4,1996-04,1996-04-01
Spencer McDaniel,Spencer McDaniel,21,green,70,12-16-1995,1995,12,1995-12,1995-12-01
