# Aggregating Time-Series Data

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

# Load the CO2 dataset
data = sm.datasets.co2.load_pandas()
co2 = data.data
co2.head()


Unnamed: 0,co2
1958-03-29,316.1
1958-04-05,317.3
1958-04-12,317.6
1958-04-19,317.5
1958-04-26,316.4


## Initial Data Exploration

In [8]:
print(co2.head())
print(co2.index)


              co2
1958-03-29  316.1
1958-04-05  317.3
1958-04-12  317.6
1958-04-19  317.5
1958-04-26  316.4
DatetimeIndex(['1958-03-29', '1958-04-05', '1958-04-12', '1958-04-19',
               '1958-04-26', '1958-05-03', '1958-05-10', '1958-05-17',
               '1958-05-24', '1958-05-31',
               ...
               '2001-10-27', '2001-11-03', '2001-11-10', '2001-11-17',
               '2001-11-24', '2001-12-01', '2001-12-08', '2001-12-15',
               '2001-12-22', '2001-12-29'],
              dtype='datetime64[ns]', length=2284, freq='W-SAT')


## Aggregation Functions
After resampling, you can apply various aggregation functions to summarize the data. Common aggregations include mean, sum, maximum, and minimum.

In [9]:
# Using different aggregation methods after resampling
yearly_co2_sum = co2.resample('A').sum()  # Sum of values every year
yearly_co2_max = co2.resample('A').max()  # Maximum value every year
yearly_co2_min = co2.resample('A').min()  # Minimum value every year

print("Yearly Sum:\n", yearly_co2_sum.head())
print("Yearly Max:\n", yearly_co2_max.head())
print("Yearly Min:\n", yearly_co2_min.head())


Yearly Sum:
                 co2
1958-12-31   7885.5
1959-12-31  15163.5
1960-12-31  16793.6
1961-12-31  16514.8
1962-12-31  15290.2
Yearly Max:
               co2
1958-12-31  317.9
1959-12-31  318.7
1960-12-31  320.0
1961-12-31  320.6
1962-12-31  321.1
Yearly Min:
               co2
1958-12-31  313.0
1959-12-31  313.0
1960-12-31  313.3
1961-12-31  314.5
1962-12-31  315.1


  yearly_co2_sum = co2.resample('A').sum()  # Sum of values every year
  yearly_co2_max = co2.resample('A').max()  # Maximum value every year
  yearly_co2_min = co2.resample('A').min()  # Minimum value every year


## Custom Aggregations
For more complex scenarios, you might want to apply multiple or custom aggregation functions.

In [10]:
# Applying multiple aggregation functions at once
yearly_co2_stats = co2.resample('A').agg(['mean', 'std', 'min', 'max'])
print(yearly_co2_stats.head())


                   co2                        
                  mean       std    min    max
1958-12-31  315.420000  1.467424  313.0  317.9
1959-12-31  315.906250  1.617643  313.0  318.7
1960-12-31  316.860377  1.984395  313.3  320.0
1961-12-31  317.592308  1.736782  314.5  320.6
1962-12-31  318.545833  1.810014  315.1  321.1


  yearly_co2_stats = co2.resample('A').agg(['mean', 'std', 'min', 'max'])
