## Time Series Data Manipulation in Pandas

In [1]:
import pandas as pd

# Sample DataFrame
data = {
    'OrderID': [1001, 1002, 1003, 1004],
    'OrderDate': ['2023-11-01', '2023-11-02', '2023-11-03', '2023-11-04'],
    'Sales': [250, 400, 150, 300]
}
df = pd.DataFrame(data)

In [2]:
df

Unnamed: 0,OrderID,OrderDate,Sales
0,1001,2023-11-01,250
1,1002,2023-11-02,400
2,1003,2023-11-03,150
3,1004,2023-11-04,300


In [3]:
df.dtypes

OrderID      int64
OrderDate      str
Sales        int64
dtype: object

In [4]:
df['OrderDate'] = pd.to_datetime(df['OrderDate'])

In [5]:
df.dtypes

OrderID               int64
OrderDate    datetime64[us]
Sales                 int64
dtype: object

In [6]:
df.set_index('OrderDate', inplace=True)
df

Unnamed: 0_level_0,OrderID,Sales
OrderDate,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-11-01,1001,250
2023-11-02,1002,400
2023-11-03,1003,150
2023-11-04,1004,300


In [7]:
df['Year'] = df.index.year
df['Month'] = df.index.month
df['Day'] = df.index.day


In [8]:
df

Unnamed: 0_level_0,OrderID,Sales,Year,Month,Day
OrderDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-01,1001,250,2023,11,1
2023-11-02,1002,400,2023,11,2
2023-11-03,1003,150,2023,11,3
2023-11-04,1004,300,2023,11,4


In [9]:
weekly_sales = df['Sales'].resample('W').sum()
weekly_sales

OrderDate
2023-11-05    1100
Freq: W-SUN, Name: Sales, dtype: int64

In [10]:
df = pd.read_csv('website_traffic_data_datetime.csv')
df

Unnamed: 0,Source,SessionDuration,Visits,Date
0,Direct,27.53,267,2023-01-01
1,Referral,25.65,476,2023-01-02
2,Social,14.03,441,2023-01-03
3,Referral,3.77,473,2023-01-04
4,Referral,11.75,434,2023-01-05
...,...,...,...,...
95,Social,3.79,226,2023-04-06
96,Referral,28.28,277,2023-04-07
97,Paid,12.53,286,2023-04-08
98,Paid,16.01,97,2023-04-09


In [11]:
df['Date'] = pd.to_datetime(df['Date'])

In [12]:
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Source,SessionDuration,Visits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,Direct,27.53,267
2023-01-02,Referral,25.65,476
2023-01-03,Social,14.03,441
2023-01-04,Referral,3.77,473
2023-01-05,Referral,11.75,434
...,...,...,...
2023-04-06,Social,3.79,226
2023-04-07,Referral,28.28,277
2023-04-08,Paid,12.53,286
2023-04-09,Paid,16.01,97


In [13]:
monthly_visits = df['Visits'].resample('ME').sum()
monthly_visits

Date
2023-01-31    7813
2023-02-28    7137
2023-03-31    8384
2023-04-30    2233
Freq: ME, Name: Visits, dtype: int64