<a href="https://colab.research.google.com/github/davidofitaly/notes_03_python_in_data_analysis/blob/main/08_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta

##8.1 Date and time data types and tools

###Introduction



*   



In [7]:
from datetime import datetime  # Import the datetime module from the standard library

time_today = datetime.now()  # Get the current date and time

time_today  # Display the current date and time

datetime.datetime(2025, 3, 11, 17, 42, 5, 402765)

In [8]:
time_today.year, time_today.month, time_today.day  # Extract the year, month, and day from the datetime object

(2025, 3, 11)



*   


In [17]:
delta_1 = datetime(2020, 10, 7) - datetime(2019, 6, 15)  # Calculate the difference between two dates

# Extract the difference in days and total difference in seconds
delta_1.days, delta_1.total_seconds()

(480, 41472000.0)



*   



In [19]:
time = datetime.now()  # Get the current date and time

time + timedelta(20)  # Add 20 days to the current date and time

datetime.datetime(2025, 3, 31, 17, 48, 12, 412469)

In [24]:
time_plus_days = time + timedelta(days=20)  # Add 20 days
time_plus_minutes = time + timedelta(minutes=30)  # Add 30 minutes
time_plus_seconds = time + timedelta(seconds=45)  # Add 45 seconds

print(time_plus_days)
print(time_plus_minutes)
print(time_plus_seconds)

2025-03-31 17:49:35.932779
2025-03-11 18:19:35.932779
2025-03-11 17:50:20.932779


###Conversion between string and datetime objects



*  


In [29]:
# Baic date format (YYYY-MM-DD)
date_1 = datetime.strptime("2025-02-28", "%Y-%m-%d")

print(date_1) #Output: 2025-02-28 00:00:00

2025-02-28 00:00:00




*  



In [27]:
# Date with time (DD/MM/YYYY HH:MM:SS)
date_2 = datetime.strptime("17/04/2025 14:30:20", "%d/%m/%Y %H:%M:%S")

print(date_2)  # Output: 2024-03-11 14:30:45

2025-04-17 14:30:20




*   


In [30]:
# Custom format (Month name, day, year)
date_3 = datetime.strptime("March 11, 2024", "%B %d, %Y")
print(date_3)  # Output: 2024-03-11 00:00:00

2024-03-11 00:00:00




*  


In [31]:
# Date with abbreviated month name
date_4 = datetime.strptime("Mar 11, 2024", "%b %d, %Y")
print(date_4)  # Output: 2024-03-11 00:00:00

2024-03-11 00:00:00




*   


In [32]:
# Using different separators
date_5 = datetime.strptime("2024.03.11 - 08:15", "%Y.%m.%d - %H:%M")

print(date_5)  # Output: 2024-03-11 08:15:00

2024-03-11 08:15:00


## 8.2 Fundamentals of time series



*  


In [41]:
# Create a list of 6 dates
dates = [datetime(2025, 10, 12) + timedelta(days=i *10) for i in range(10)] # Dates with a 10-day interval

# Generate random values from a normal distribution
values = np.random.standard_normal(10)

# Create the pandas Series with dates as the index
date_series = pd.Series(values, index=dates)

date_series



Unnamed: 0,0
2025-10-12,-0.622706
2025-10-22,-0.405387
2025-11-01,-0.957087
2025-11-11,-0.243918
2025-11-21,-0.873032
2025-12-01,-0.113742
2025-12-11,-1.143924
2025-12-21,-1.834915
2025-12-31,1.186504
2026-01-10,0.36967


In [36]:
date_series.index  # Access the index (dates) of the pandas Series

DatetimeIndex(['2025-10-12', '2025-10-22', '2025-11-01', '2025-11-11',
               '2025-11-21', '2025-12-01', '2025-12-11', '2025-12-21',
               '2025-12-31', '2026-01-10'],
              dtype='datetime64[ns]', freq=None)

In [39]:
date_series + date_series[::3]  # Add every third value from the Series to the entire Series

Unnamed: 0,0
2025-10-12,-0.117194
2025-10-22,
2025-11-01,
2025-11-11,0.3441
2025-11-21,
2025-12-01,
2025-12-11,-1.696384
2025-12-21,
2025-12-31,
2026-01-10,-2.246206


In [43]:
date_series['2025-11-11']  # Access the value in the Series for the date '2025-11-11'

-0.24391833058500229



*   



In [45]:
# Create a pandas Series with 1000 random values from a standard normal distribution
# The index is a date range starting from '2025-01-01' with 1000 periods
date_series_1000 = pd.Series(np.random.standard_normal(1000), index=pd.date_range('2025-01-01', periods=1000))

date_series_1000  # Output the Series with the generated dates and random values

Unnamed: 0,0
2025-01-01,0.824338
2025-01-02,-0.525182
2025-01-03,0.251120
2025-01-04,-1.684837
2025-01-05,-1.410256
...,...
2027-09-23,0.720776
2027-09-24,0.914825
2027-09-25,-0.327021
2027-09-26,-1.876978


In [47]:
date_series_1000['2026']  # Access the values in the Series for the year 2026

Unnamed: 0,0
2026-01-01,-0.734096
2026-01-02,-0.028901
2026-01-03,0.496581
2026-01-04,0.076928
2026-01-05,-0.093201
...,...
2026-12-27,0.308094
2026-12-28,-0.475792
2026-12-29,0.598617
2026-12-30,-0.776443


In [49]:
date_series_1000['2025-10']  # Access the values in the Series for the month of October 2025

Unnamed: 0,0
2025-10-01,-0.054827
2025-10-02,-0.016064
2025-10-03,0.322522
2025-10-04,0.750339
2025-10-05,-1.315368
2025-10-06,-0.11149
2025-10-07,-0.113109
2025-10-08,2.010557
2025-10-09,0.871584
2025-10-10,-0.010349


In [56]:
date_series_1000[datetime(2027, 2, 10):]  # Access values in the Series from February 10, 2027 onwards

Unnamed: 0,0
2027-02-10,-0.901503
2027-02-11,-1.196289
2027-02-12,0.539017
2027-02-13,-0.658385
2027-02-14,1.276504
...,...
2027-09-23,0.720776
2027-09-24,0.914825
2027-09-25,-0.327021
2027-09-26,-1.876978


In [60]:
date_series_1000[datetime(2027, 2, 10):datetime(2027, 8, 20)]  # Access values in the Series between February 10, 2027 and August 20, 2027

Unnamed: 0,0
2027-02-10,-0.901503
2027-02-11,-1.196289
2027-02-12,0.539017
2027-02-13,-0.658385
2027-02-14,1.276504
...,...
2027-08-16,-0.614316
2027-08-17,0.561951
2027-08-18,0.064635
2027-08-19,-0.470642


In [62]:
date_series_1000['2025-12-01': '2026-05-29']  # Access values in the Series from December 1, 2025 to May 29, 2026

Unnamed: 0,0
2025-12-01,-0.060199
2025-12-02,-0.556266
2025-12-03,-0.026696
2025-12-04,0.584836
2025-12-05,-0.031443
...,...
2026-05-25,0.216300
2026-05-26,0.997105
2026-05-27,-0.147117
2026-05-28,1.363876


In [70]:
date_series_1000.is_unique  # Check if the index of the Series has unique values

True



*


In [67]:
# Create a list of dates with duplicates
dates_with_duplicates = [datetime(2025, 10, 12) + timedelta(days=i*5) for i in range(6)]

# Add duplicates to the list
dates_with_duplicates.extend([datetime(2025, 10, 12), datetime(2025, 10, 17)])

# Create a pandas Series with random values
date_series_with_duplicates = pd.Series(np.random.standard_normal(len(dates_with_duplicates)), index=dates_with_duplicates)

date_series_with_duplicates


Unnamed: 0,0
2025-10-12,1.014366
2025-10-17,-0.966826
2025-10-22,0.880103
2025-10-27,-0.131047
2025-11-01,0.126648
2025-11-06,-0.82113
2025-10-12,-0.43083
2025-10-17,-0.596395


In [72]:
date_series_with_duplicates.index.is_unique  # Check if the index of the Series with duplicates has unique values

False

In [73]:
# Find the duplicated dates in the index
duplicates = date_series_with_duplicates.index[date_series_with_duplicates.index.duplicated()]

print(duplicates)

DatetimeIndex(['2025-10-12', '2025-10-17'], dtype='datetime64[ns]', freq=None)


In [74]:
# Find the dates that appear more than once
duplicated_dates = date_series_with_duplicates.index[date_series_with_duplicates.index.isin(duplicates)]

print(duplicated_dates)


DatetimeIndex(['2025-10-12', '2025-10-17', '2025-10-12', '2025-10-17'], dtype='datetime64[ns]', freq=None)
