# 1. BASIC PYTHON DATETIME MODULE

In [None]:
# datetime is one of Python's core standard libraries 
# We are going to use two methods to deal with dates/times: (1) date and (2) dateime
# date: helps us define dates only without including time (month, day, year)
# datetime: helps us define times and dates together (month, day, year, hour, second, microsecond)
# Let's import datetime module as dt
import pandas as pd
import datetime as dt

In [None]:
# Pick a date using Python's date method inside the datetime module
my_date = dt.date(2020, 3, 22)
my_date

In [None]:
# Check the data type (datetime object)
type(my_date)

In [None]:
# Convert it into string to view the date and time
str(my_date)

In [None]:
# Attribute day will return the day contained in the datetime object
my_date.day

In [None]:
# Month attribute will return the month contained in the datetime object
my_date.month

In [None]:
# year attribute will return the year contained in the datetime object
my_date.year

In [None]:
# Let's define a datetime using datetime method as follows
my_datetime = dt.datetime(2020, 3, 22, 8, 20, 50)

In [None]:
# Convert it into string to view the date time
str(my_datetime)

In [None]:
# Obtain the hour contained in the datetime object
my_datetime.hour

In [None]:
# Obtain the minutes contained in the datetime object
my_datetime.minute

In [None]:
# print out calendar!
import calendar
print(calendar.month(2021, 3))

In [None]:
# You can also use pd.datetime to convert a regular Pandas Series into datetime as follows:
dates = pd.Series(["2020/03/22", "2020-08-25", "March 22nd, 2020"])
dates

In [None]:
# The to_datetime() method converts the date and time in string format to a DateTime object:
my_dates = pd.to_datetime(dates)
my_dates

**MINI CHALLENGE #1:**
- **Use Python's datetime method to write your date and time of your birth! Convert it into string format**

# 2. HANDLING DATES AND TIMES USING PANDAS 

In [None]:
# Timestamp is the pandas equivalent of python’s Datetime and is interchangeable with it in most cases. 
# It’s the type used for the entries that make up a DatetimeIndex, and other timeseries oriented data structures in pandas.
# Check this out: https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html
pd.Timestamp('2020, 3, 22')

In [None]:
pd.Timestamp('2020, 3, 22, 10')

In [None]:
# Or you can define a Pandas Timestamp using Python datetime object 
pd.Timestamp(dt.datetime(2020, 3, 22, 8, 20, 50))

In [None]:
# Calculate difference between two dates
day_1 = pd.Timestamp('1998, 3, 22, 10')
day_2 = pd.Timestamp('2021, 3, 22, 10')
delta = day_2-day_1
print(delta)

In [None]:
# Let's define 3 dates for 3 separate transactions
date_1 = dt.date(2020, 3, 22)
date_2 = dt.date(2020, 4, 22)
date_3 = dt.date(2020, 5, 22)


In [None]:
# Let's put the 3 dates in a list as follows
dates_list = [date_1, date_2, date_3]

# Use Pandas DateTimeIndex to convert the list into datetime datatype as follows
# Datetime index constructor method creates a collection of dates

dates_index = pd.DatetimeIndex(dates_list)

In [None]:
dates_index

In [None]:
# Define a list that carries 3 values corresponding to store sales
sales = [50, 55, 60]

In [None]:
# Define a Pandas Series using datetime and values as follows:
sales = pd.Series(data = sales, index = dates_index) # Series constructor method
sales

In [None]:
# you can also define a range of dates as follows:
my_days = pd.date_range(start = "2020-01-01", end = "2020-04-01", freq = "D")
my_days

In [None]:
# Datatype is a datetimeindex (which is a collection of dates!)
type(my_days)

In [None]:
# If you pick any sample date, it's represented in timestamp datatype
type(my_days[4])

In [None]:
# you can also define a range of dates using M which stands for month end as follows:
my_days = pd.date_range(start = "2020-01-01", end = "2020-08-01", freq = "M")
my_days

In [None]:
# Alternative way of defining a list of dates
pd.date_range(start = "2020-01-01", periods = 20, freq = "D")

**MINI CHALLENGE #2:**
- **Obtain the business days between 2020-01-01 and 2020-04-01**

# 3. DATETIME IN ACTION! PRACTICAL EXAMPLE PART #1

In [None]:
# dataframes creation for both training and testing datasets 
avocado_df = pd.read_csv('avocado.csv')

# Convert Invoice date to datetime format
avocado_df['Date'] = pd.to_datetime(avocado_df['Date'])

# Date: The date of the observation
# AveragePrice: the average price of a single avocado
# type: conventional or organic
# Region: the city or region of the observation
# Total Volume: Total number of avocados sold


In [None]:
avocado_df

In [None]:
# You can select any column to be the index for the DataFrame
avocado_df.set_index(keys = ["Date"], inplace = True)

In [None]:
avocado_df.head(10)

In [None]:
avocado_df.values

In [None]:
avocado_df.columns

In [None]:
avocado_df.index

**MINI CHALLENGE #3:**
- **What are the datatypes of each column in the avocado_df DataFrame?**

# 4. DATETIME IN ACTION! PRACTICAL EXAMPLE PART #2

In [None]:
avocado_df.head(10)

In [None]:
# access elements with a specific datetime index using .loc
avocado_df.loc["2018-01-21"]

In [None]:
# You can use iloc if you decide to use numeric indexes
avocado_df.iloc[5]

In [None]:
# Access more than one element within a given date range
avocado_df.loc["2015-01-04":"2015-01-25"]

In [None]:
# Truncate a sorted DataFrame given index bounds.
# Make sure to sort the dataframe before applying truncate 
avocado_df.sort_index(inplace = True)
avocado_df.truncate("2017-01-01", "2018-02-01")

In [None]:
avocado_df

In [None]:
# you can offset (shift) all dates by days or month as follows
avocado_df.index = avocado_df.index + pd.DateOffset(months = 12, days = 30)

In [None]:
avocado_df

In [None]:
# Let's revert back to the original dataset!
avocado_df.index = avocado_df.index - pd.DateOffset(months = 12, days = 30)
avocado_df

In [None]:
# Once you have the index set to DateTime, this unlocks its power by performing aggregation
# Aggregating the data by year (A = annual)
avocado_df.resample(rule = 'A').mean()


In [None]:
# Aggregating the data by month (M = Month)
avocado_df.resample(rule='M').mean()


In [None]:
# You can obtain the maximum value for each Quarter end as follows: 
avocado_df.resample(rule='Q').max()

In [None]:
# You can locate the rows that satisfies a given critirea as follows: 
low_price = avocado_df["AveragePrice"].where(avocado_df["AveragePrice"]<1.2)
low_price

In [None]:
low_price = avocado_df["AveragePrice"].where(avocado_df["AveragePrice"]<1.3).resample("1M").count()
low_price

In [None]:
# You can create new features from the date time information as follows
avocado_df.reset_index(inplace=True)

avocado_df['Day'] = avocado_df['Date'].dt.day
avocado_df

In [None]:
avocado_df['Month'] = avocado_df['Date'].dt.month
avocado_df

In [None]:
avocado_df['Year'] = avocado_df['Date'].dt.year
avocado_df

In [None]:
# Don't forget to set the Date to be in the index for the DataFrame
avocado_df.set_index(keys = ["Date"], inplace = True)

In [None]:
avocado_df

**MINI CHALLENGE #4:**
- **Calculate the average avocado price per quarter end**

# 5. DATA PLOTTING (STRETCH ASSIGNMENT!)

In [None]:
# Once you have index set to DateTime, this unlocks its power by performing aggregation
# Aggregating the data by month end
avocado_df.resample(rule='M').mean()


In [None]:
# plot the avocado average price per month 
avocado_df.resample(rule='M').mean()['AveragePrice'].plot(figsize = (10,5), marker = 'o', color = 'r');

In [None]:
# plot the avocado average price per quarter  
avocado_df.resample(rule='Q').mean()['AveragePrice'].plot(figsize = (10,5), marker = 'o', color = 'r');

In [None]:
# plot the avocado average price per annual basis 
avocado_df.resample(rule='A').mean()['AveragePrice'].plot(figsize = (10,5), marker = 'o', color = 'r');


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 7))
sns.violinplot(y = "AveragePrice", x = "type", data = avocado_df);

In [None]:
# plot the distribution plot of avocado prices (histogram + Kernel Denisty Estimate)
plt.figure(figsize=(13,6))
sns.distplot(avocado_df["AveragePrice"], color = 'b');

In [None]:
# Catplot is used to draw categorical plots onto a FacetGrid.
# Catplot provides access to several axes-level functions that show the relationship between a numerical and one or more categorical variables.
conventional = sns.catplot('AveragePrice','region', data = avocado_df[ avocado_df['type'] == 'conventional'], hue = 'Year', height = 10);

**MINI CHALLENGE #5:**
- **Plot the average price of avocado on a weekly basis**
- **Plot Catplot for price vs. region for organic food**

# MINI CHALLENGE SOLUTIONS:

**MINI CHALLENGE #1 SOLUTION:**
- **Use Python's datetime method to write your date and time of your birth. Convert it into string format!!**

In [None]:
# Let's define a datetime using datetime method as follows
my_birth = dt.datetime(1992, 10, 25, 8, 9, 20)
str(my_birth)

**MINI CHALLENGE #2 SOLUTION:**
- **Obtain the business days between 2020-01-01 and 2020-04-01**

In [None]:
# you can also define a range of dates using B which stands for business days as follows:
my_days = pd.date_range(start = "2020-01-01", end = "2020-04-01", freq = "B")
my_days

**MINI CHALLENGE #3 SOLUTION:**
- **What are the datatypes of each column in the DataFrame?**

In [None]:
avocado_df.info()

**MINI CHALLENGE #4 SOLUTION:**
- **Calculate the average avocado price per quarter end**

In [None]:
# You can obtain the average value for each Quarter end as follows: 
avocado_df.resample(rule='Q').mean()

**MINI CHALLENGE #5 SOLUTION:**
- **Plot the average price of avocado on a weekly basis**
- **Plot Catplot for price vs. region for organic food**

In [None]:
# plot the avocado average price per weekly basis 
avocado_df.resample(rule='W').mean()['AveragePrice'].plot(figsize = (10,5), marker = 'o', color = 'r');

In [None]:
organic = sns.catplot('AveragePrice', 'region', data = avocado_df[ avocado_df['type'] == 'organic'], hue = 'Year', height = 20);