<a href="https://colab.research.google.com/github/davidofitaly/notes_03_python_in_data_analysis/blob/main/08_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import pytz

##8.1 Date and time data types and tools

###Introduction



*   



In [None]:
from datetime import datetime  # Import the datetime module from the standard library

time_today = datetime.now()  # Get the current date and time

time_today  # Display the current date and time

datetime.datetime(2025, 3, 11, 17, 42, 5, 402765)

In [None]:
time_today.year, time_today.month, time_today.day  # Extract the year, month, and day from the datetime object

(2025, 3, 11)



*   


In [None]:
delta_1 = datetime(2020, 10, 7) - datetime(2019, 6, 15)  # Calculate the difference between two dates

# Extract the difference in days and total difference in seconds
delta_1.days, delta_1.total_seconds()

(480, 41472000.0)



*   



In [None]:
time = datetime.now()  # Get the current date and time

time + timedelta(20)  # Add 20 days to the current date and time

datetime.datetime(2025, 3, 31, 17, 48, 12, 412469)

In [None]:
time_plus_days = time + timedelta(days=20)  # Add 20 days
time_plus_minutes = time + timedelta(minutes=30)  # Add 30 minutes
time_plus_seconds = time + timedelta(seconds=45)  # Add 45 seconds

print(time_plus_days)
print(time_plus_minutes)
print(time_plus_seconds)

2025-03-31 17:49:35.932779
2025-03-11 18:19:35.932779
2025-03-11 17:50:20.932779


###Conversion between string and datetime objects



*  


In [None]:
# Baic date format (YYYY-MM-DD)
date_1 = datetime.strptime("2025-02-28", "%Y-%m-%d")

print(date_1) #Output: 2025-02-28 00:00:00

2025-02-28 00:00:00




*  



In [None]:
# Date with time (DD/MM/YYYY HH:MM:SS)
date_2 = datetime.strptime("17/04/2025 14:30:20", "%d/%m/%Y %H:%M:%S")

print(date_2)  # Output: 2024-03-11 14:30:45

2025-04-17 14:30:20




*   


In [None]:
# Custom format (Month name, day, year)
date_3 = datetime.strptime("March 11, 2024", "%B %d, %Y")
print(date_3)  # Output: 2024-03-11 00:00:00

2024-03-11 00:00:00




*  


In [None]:
# Date with abbreviated month name
date_4 = datetime.strptime("Mar 11, 2024", "%b %d, %Y")
print(date_4)  # Output: 2024-03-11 00:00:00

2024-03-11 00:00:00




*   


In [None]:
# Using different separators
date_5 = datetime.strptime("2024.03.11 - 08:15", "%Y.%m.%d - %H:%M")

print(date_5)  # Output: 2024-03-11 08:15:00

2024-03-11 08:15:00


## 8.2 Fundamentals of time series



*  


In [None]:
# Create a list of 6 dates
dates = [datetime(2025, 10, 12) + timedelta(days=i *10) for i in range(10)] # Dates with a 10-day interval

# Generate random values from a normal distribution
values = np.random.standard_normal(10)

# Create the pandas Series with dates as the index
date_series = pd.Series(values, index=dates)

date_series



Unnamed: 0,0
2025-10-12,-0.622706
2025-10-22,-0.405387
2025-11-01,-0.957087
2025-11-11,-0.243918
2025-11-21,-0.873032
2025-12-01,-0.113742
2025-12-11,-1.143924
2025-12-21,-1.834915
2025-12-31,1.186504
2026-01-10,0.36967


In [None]:
date_series.index  # Access the index (dates) of the pandas Series

DatetimeIndex(['2025-10-12', '2025-10-22', '2025-11-01', '2025-11-11',
               '2025-11-21', '2025-12-01', '2025-12-11', '2025-12-21',
               '2025-12-31', '2026-01-10'],
              dtype='datetime64[ns]', freq=None)

In [None]:
date_series + date_series[::3]  # Add every third value from the Series to the entire Series

Unnamed: 0,0
2025-10-12,-0.117194
2025-10-22,
2025-11-01,
2025-11-11,0.3441
2025-11-21,
2025-12-01,
2025-12-11,-1.696384
2025-12-21,
2025-12-31,
2026-01-10,-2.246206


In [None]:
date_series['2025-11-11']  # Access the value in the Series for the date '2025-11-11'

-0.24391833058500229



*   



In [None]:
# Create a pandas Series with 1000 random values from a standard normal distribution
# The index is a date range starting from '2025-01-01' with 1000 periods
date_series_1000 = pd.Series(np.random.standard_normal(1000), index=pd.date_range('2025-01-01', periods=1000))

date_series_1000  # Output the Series with the generated dates and random values

Unnamed: 0,0
2025-01-01,0.824338
2025-01-02,-0.525182
2025-01-03,0.251120
2025-01-04,-1.684837
2025-01-05,-1.410256
...,...
2027-09-23,0.720776
2027-09-24,0.914825
2027-09-25,-0.327021
2027-09-26,-1.876978


In [None]:
date_series_1000['2026']  # Access the values in the Series for the year 2026

Unnamed: 0,0
2026-01-01,-0.734096
2026-01-02,-0.028901
2026-01-03,0.496581
2026-01-04,0.076928
2026-01-05,-0.093201
...,...
2026-12-27,0.308094
2026-12-28,-0.475792
2026-12-29,0.598617
2026-12-30,-0.776443


In [None]:
date_series_1000['2025-10']  # Access the values in the Series for the month of October 2025

Unnamed: 0,0
2025-10-01,-0.054827
2025-10-02,-0.016064
2025-10-03,0.322522
2025-10-04,0.750339
2025-10-05,-1.315368
2025-10-06,-0.11149
2025-10-07,-0.113109
2025-10-08,2.010557
2025-10-09,0.871584
2025-10-10,-0.010349


In [None]:
date_series_1000[datetime(2027, 2, 10):]  # Access values in the Series from February 10, 2027 onwards

Unnamed: 0,0
2027-02-10,-0.901503
2027-02-11,-1.196289
2027-02-12,0.539017
2027-02-13,-0.658385
2027-02-14,1.276504
...,...
2027-09-23,0.720776
2027-09-24,0.914825
2027-09-25,-0.327021
2027-09-26,-1.876978


In [None]:
date_series_1000[datetime(2027, 2, 10):datetime(2027, 8, 20)]  # Access values in the Series between February 10, 2027 and August 20, 2027

Unnamed: 0,0
2027-02-10,-0.901503
2027-02-11,-1.196289
2027-02-12,0.539017
2027-02-13,-0.658385
2027-02-14,1.276504
...,...
2027-08-16,-0.614316
2027-08-17,0.561951
2027-08-18,0.064635
2027-08-19,-0.470642


In [None]:
date_series_1000['2025-12-01': '2026-05-29']  # Access values in the Series from December 1, 2025 to May 29, 2026

Unnamed: 0,0
2025-12-01,-0.060199
2025-12-02,-0.556266
2025-12-03,-0.026696
2025-12-04,0.584836
2025-12-05,-0.031443
...,...
2026-05-25,0.216300
2026-05-26,0.997105
2026-05-27,-0.147117
2026-05-28,1.363876


In [None]:
date_series_1000.is_unique  # Check if the index of the Series has unique values

True



*


In [None]:
# Create a list of dates with duplicates
dates_with_duplicates = [datetime(2025, 10, 12) + timedelta(days=i*5) for i in range(6)]

# Add duplicates to the list
dates_with_duplicates.extend([datetime(2025, 10, 12), datetime(2025, 10, 17)])

# Create a pandas Series with random values
date_series_with_duplicates = pd.Series(np.random.standard_normal(len(dates_with_duplicates)), index=dates_with_duplicates)

date_series_with_duplicates


Unnamed: 0,0
2025-10-12,1.014366
2025-10-17,-0.966826
2025-10-22,0.880103
2025-10-27,-0.131047
2025-11-01,0.126648
2025-11-06,-0.82113
2025-10-12,-0.43083
2025-10-17,-0.596395


In [None]:
date_series_with_duplicates.index.is_unique  # Check if the index of the Series with duplicates has unique values

False

In [None]:
# Find the duplicated dates in the index
duplicates = date_series_with_duplicates.index[date_series_with_duplicates.index.duplicated()]

print(duplicates)

DatetimeIndex(['2025-10-12', '2025-10-17'], dtype='datetime64[ns]', freq=None)


In [None]:
# Find the dates that appear more than once
duplicated_dates = date_series_with_duplicates.index[date_series_with_duplicates.index.isin(duplicates)]

print(duplicated_dates)


DatetimeIndex(['2025-10-12', '2025-10-17', '2025-10-12', '2025-10-17'], dtype='datetime64[ns]', freq=None)


##8.3 Date ranges, frequencies and offsets

###Generation of date ranges



*   


In [2]:
# Create a date range with daily frequency
dates = pd.date_range(start='2025-01-01', end='2025-01-10')
print(dates)

DatetimeIndex(['2025-01-01', '2025-01-02', '2025-01-03', '2025-01-04',
               '2025-01-05', '2025-01-06', '2025-01-07', '2025-01-08',
               '2025-01-09', '2025-01-10'],
              dtype='datetime64[ns]', freq='D')




*   


In [11]:
# Create a date range with a fixed number of periods
dates = pd.date_range(start='2020-10-17', periods=5)
print(dates)

DatetimeIndex(['2020-10-17', '2020-10-18', '2020-10-19', '2020-10-20',
               '2020-10-21'],
              dtype='datetime64[ns]', freq='D')




*



In [10]:
# Create a monthly date range (end of the month)
dates = pd.date_range(start='2025-07-08', periods=4, freq='ME')
print(dates)

DatetimeIndex(['2025-07-31', '2025-08-31', '2025-09-30', '2025-10-31'], dtype='datetime64[ns]', freq='ME')




*   



In [8]:
# Create an hourly date range
dates = pd.date_range(start='2025-03-12 08:00', periods=30, freq='h')
print(dates)

DatetimeIndex(['2024-03-12 08:00:00', '2024-03-12 09:00:00',
               '2024-03-12 10:00:00', '2024-03-12 11:00:00',
               '2024-03-12 12:00:00', '2024-03-12 13:00:00',
               '2024-03-12 14:00:00', '2024-03-12 15:00:00',
               '2024-03-12 16:00:00', '2024-03-12 17:00:00',
               '2024-03-12 18:00:00', '2024-03-12 19:00:00',
               '2024-03-12 20:00:00', '2024-03-12 21:00:00',
               '2024-03-12 22:00:00', '2024-03-12 23:00:00',
               '2024-03-13 00:00:00', '2024-03-13 01:00:00',
               '2024-03-13 02:00:00', '2024-03-13 03:00:00',
               '2024-03-13 04:00:00', '2024-03-13 05:00:00',
               '2024-03-13 06:00:00', '2024-03-13 07:00:00',
               '2024-03-13 08:00:00', '2024-03-13 09:00:00',
               '2024-03-13 10:00:00', '2024-03-13 11:00:00',
               '2024-03-13 12:00:00', '2024-03-13 13:00:00'],
              dtype='datetime64[ns]', freq='h')




*   


In [9]:
# Create a business day date range (excluding weekends)
dates = pd.date_range(start='2025-03-01', periods=7, freq='B')
print(dates)

DatetimeIndex(['2025-03-03', '2025-03-04', '2025-03-05', '2025-03-06',
               '2025-03-07', '2025-03-10', '2025-03-11'],
              dtype='datetime64[ns]', freq='B')


###Frequencies and date shifts





*   



In [18]:
# Generate a range with a 4-hour interval
dates = pd.date_range(start='2024-03-12 08:00', periods=12, freq='4h')
print(dates)

DatetimeIndex(['2024-03-12 08:00:00', '2024-03-12 12:00:00',
               '2024-03-12 16:00:00', '2024-03-12 20:00:00',
               '2024-03-13 00:00:00', '2024-03-13 04:00:00',
               '2024-03-13 08:00:00', '2024-03-13 12:00:00',
               '2024-03-13 16:00:00', '2024-03-13 20:00:00',
               '2024-03-14 00:00:00', '2024-03-14 04:00:00'],
              dtype='datetime64[ns]', freq='4h')




*   


In [19]:
# Generate a range with a 1 hour 30 minutes interval
dates = pd.date_range(start='2025-01-20 08:00', periods=8, freq='1h30min')
print(dates)


DatetimeIndex(['2025-01-20 08:00:00', '2025-01-20 09:30:00',
               '2025-01-20 11:00:00', '2025-01-20 12:30:00',
               '2025-01-20 14:00:00', '2025-01-20 15:30:00',
               '2025-01-20 17:00:00', '2025-01-20 18:30:00'],
              dtype='datetime64[ns]', freq='90min')




*   


In [20]:
# Generate a range with a 15-minute interval
dates = pd.date_range(start='2026-10-02 09:20', periods=10, freq='10min')
print(dates)

DatetimeIndex(['2026-10-02 09:20:00', '2026-10-02 09:30:00',
               '2026-10-02 09:40:00', '2026-10-02 09:50:00',
               '2026-10-02 10:00:00', '2026-10-02 10:10:00',
               '2026-10-02 10:20:00', '2026-10-02 10:30:00',
               '2026-10-02 10:40:00', '2026-10-02 10:50:00'],
              dtype='datetime64[ns]', freq='10min')




*   


In [21]:
# Generate a range with a weekly frequency (every Monday)
dates = pd.date_range(start='2025-09-20', periods=10, freq='W-MON')
print(dates)

DatetimeIndex(['2025-09-22', '2025-09-29', '2025-10-06', '2025-10-13',
               '2025-10-20', '2025-10-27', '2025-11-03', '2025-11-10',
               '2025-11-17', '2025-11-24'],
              dtype='datetime64[ns]', freq='W-MON')




*  


In [22]:
# Generate a range with a quarterly frequency (first day of the quarter)
dates = pd.date_range(start='2023-11-17', periods=4, freq='QS')
print(dates)

DatetimeIndex(['2024-01-01', '2024-04-01', '2024-07-01', '2024-10-01'], dtype='datetime64[ns]', freq='QS-JAN')




*  



In [25]:
# Generate a range with an annual frequency (last day of the year)
dates = pd.date_range(start='2010-01-01', periods=20, freq='YE')
print(dates)

DatetimeIndex(['2010-12-31', '2011-12-31', '2012-12-31', '2013-12-31',
               '2014-12-31', '2015-12-31', '2016-12-31', '2017-12-31',
               '2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31',
               '2022-12-31', '2023-12-31', '2024-12-31', '2025-12-31',
               '2026-12-31', '2027-12-31', '2028-12-31', '2029-12-31'],
              dtype='datetime64[ns]', freq='YE-DEC')


###Moving the date

In [27]:
# Create a Series with a daily date range as the index and normally distributed values
series = pd.Series(
    np.random.randn(50),  # Generate 100 random values from a normal distribution
    index=pd.date_range(start='2027-10-11', periods=50, freq='D')  # 100 daily timestamps
)

series

Unnamed: 0,0
2027-10-11,-1.22672
2027-10-12,-0.23664
2027-10-13,0.258589
2027-10-14,1.266162
2027-10-15,0.635388
2027-10-16,-1.499472
2027-10-17,-0.019984
2027-10-18,1.547622
2027-10-19,-0.549912
2027-10-20,-0.338215




*   


In [28]:
series.shift(1)   # Shift values forward by 1 day


Unnamed: 0,0
2027-10-11,
2027-10-12,-1.22672
2027-10-13,-0.23664
2027-10-14,0.258589
2027-10-15,1.266162
2027-10-16,0.635388
2027-10-17,-1.499472
2027-10-18,-0.019984
2027-10-19,1.547622
2027-10-20,-0.549912




*   


In [29]:
series.shift(-1)  # Shift values backward by 1 day

Unnamed: 0,0
2027-10-11,-0.23664
2027-10-12,0.258589
2027-10-13,1.266162
2027-10-14,0.635388
2027-10-15,-1.499472
2027-10-16,-0.019984
2027-10-17,1.547622
2027-10-18,-0.549912
2027-10-19,-0.338215
2027-10-20,0.939047





*   



In [30]:
series.shift(7)   # Shift values forward by 7 days

Unnamed: 0,0
2027-10-11,
2027-10-12,
2027-10-13,
2027-10-14,
2027-10-15,
2027-10-16,
2027-10-17,
2027-10-18,-1.22672
2027-10-19,-0.23664
2027-10-20,0.258589




*   


In [31]:
series.shift(1, freq='H')   # Shift forward by 1 hour

  series.shift(1, freq='H')   # Shift forward by 1 hour


Unnamed: 0,0
2027-10-11 01:00:00,-1.22672
2027-10-12 01:00:00,-0.23664
2027-10-13 01:00:00,0.258589
2027-10-14 01:00:00,1.266162
2027-10-15 01:00:00,0.635388
2027-10-16 01:00:00,-1.499472
2027-10-17 01:00:00,-0.019984
2027-10-18 01:00:00,1.547622
2027-10-19 01:00:00,-0.549912
2027-10-20 01:00:00,-0.338215




*   


In [32]:
series.shift(1, freq='30min')  # Shift forward by 30 minutes

Unnamed: 0,0
2027-10-11 00:30:00,-1.22672
2027-10-12 00:30:00,-0.23664
2027-10-13 00:30:00,0.258589
2027-10-14 00:30:00,1.266162
2027-10-15 00:30:00,0.635388
2027-10-16 00:30:00,-1.499472
2027-10-17 00:30:00,-0.019984
2027-10-18 00:30:00,1.547622
2027-10-19 00:30:00,-0.549912
2027-10-20 00:30:00,-0.338215


In [36]:
# Tworzymy przykładowe dane minutowe
data = {'date': pd.date_range('2025-03-01', periods=10, freq='T'),
        'value': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]}

df = pd.DataFrame(data)
df.set_index('date', inplace=True)

# Konwersja częstotliwości z minut na godziny
df_converted = df.asfreq('H')

print(df_converted)



            value
date             
2025-03-01    100


  data = {'date': pd.date_range('2025-03-01', periods=10, freq='T'),
  df_converted = df.asfreq('H')


In [37]:
import pandas as pd

# Tworzymy przykładowe dane
data = {'date': pd.date_range('2025-03-01', periods=7, freq='D'),
        'temperature': [10, 15, 12, 17, 16, 14, 13]}

df = pd.DataFrame(data)
df.set_index('date', inplace=True)

# Obliczanie średniej ruchomej z okna 3-dniowego
df['rolling_mean'] = df['temperature'].rolling(window=3).mean()

print(df)


            temperature  rolling_mean
date                                 
2025-03-01           10           NaN
2025-03-02           15           NaN
2025-03-03           12     12.333333
2025-03-04           17     14.666667
2025-03-05           16     15.000000
2025-03-06           14     15.666667
2025-03-07           13     14.333333
