In [7]:
import pandas as pd

df = pd.read_excel("Dates.xlsx", sheet_name="Sheet1")

df.head(10)

Unnamed: 0,Order Date,Send Date,Sales
0,2023-08-30,s20230903,59
1,2024-07-11,s20240717,56
2,2023-07-01,s20230729,33
3,2022-10-25,xxxx,25
4,2022-10-15,s20221016,75
5,2023-12-16,s20231217,36
6,2022-03-25,s20220406,100
7,2024-02-06,s20240223,78
8,2024-01-19,s20240218,70
9,2023-04-09,s20230416,67


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Order Date  31 non-null     datetime64[ns]
 1   Send Date   31 non-null     object        
 2   Sales       31 non-null     int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 876.0+ bytes


## Konwersja kolumny 'Date' na typ datetime

Pierwszym krokiem jest konwersja kolumny zawierającej daty na typ datetime.

In [9]:
df['Send Date'] = pd.to_datetime(df['Send Date'], format='s%Y%m%d', errors='coerce')
df.info()
df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Order Date  31 non-null     datetime64[ns]
 1   Send Date   30 non-null     datetime64[ns]
 2   Sales       31 non-null     int64         
dtypes: datetime64[ns](2), int64(1)
memory usage: 876.0 bytes


Unnamed: 0,Order Date,Send Date,Sales
0,2023-08-30,2023-09-03,59
1,2024-07-11,2024-07-17,56
2,2023-07-01,2023-07-29,33
3,2022-10-25,NaT,25
4,2022-10-15,2022-10-16,75


## Wyodrębnianie komponentów dat

Po konwersji kolumny na typ datetime możemy łatwo wyodrębnić komponenty dat, takie jak rok, miesiąc, dzień itp.

In [10]:
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.month
df['Day'] = df['Order Date'].dt.day
df['DayOfWeek'] = df['Order Date'].dt.dayofweek

df.head()


Unnamed: 0,Order Date,Send Date,Sales,Year,Month,Day,DayOfWeek
0,2023-08-30,2023-09-03,59,2023,8,30,2
1,2024-07-11,2024-07-17,56,2024,7,11,3
2,2023-07-01,2023-07-29,33,2023,7,1,5
3,2022-10-25,NaT,25,2022,10,25,1
4,2022-10-15,2022-10-16,75,2022,10,15,5


## Operacje arytmetyczne na datach

In [11]:
df['Order Date Plus 7'] = df['Order Date'] + pd.Timedelta(days=7)
df['Sent After Days'] = (df['Send Date'] - df['Order Date']).dt.days
df.head()

Unnamed: 0,Order Date,Send Date,Sales,Year,Month,Day,DayOfWeek,Order Date Plus 7,Sent After Days
0,2023-08-30,2023-09-03,59,2023,8,30,2,2023-09-06,4.0
1,2024-07-11,2024-07-17,56,2024,7,11,3,2024-07-18,6.0
2,2023-07-01,2023-07-29,33,2023,7,1,5,2023-07-08,28.0
3,2022-10-25,NaT,25,2022,10,25,1,2022-11-01,
4,2022-10-15,2022-10-16,75,2022,10,15,5,2022-10-22,1.0


## Grupowanie danych według okresów czasowych

In [12]:
quarter_sales = df.groupby(df['Order Date'].dt.to_period('Q'))['Sales'].sum()
quarter_sales

Order Date
2021Q3     21
2022Q1    265
2022Q2     67
2022Q3    112
2022Q4    124
2023Q1     68
2023Q2    108
2023Q3     92
2023Q4    232
2024Q1    550
2024Q2    117
2024Q3     56
Freq: Q-DEC, Name: Sales, dtype: int64

In [13]:
average_daily_sales = df.groupby(df['Order Date'].dt.day_name())['Sales'].mean()
average_daily_sales

Order Date
Friday       60.666667
Monday       74.250000
Saturday     54.000000
Sunday       44.666667
Thursday     56.000000
Tuesday      53.625000
Wednesday    63.200000
Name: Sales, dtype: float64