# US Monthly CO2 Data

In [1]:
# imports
import pandas as pd
import numpy as np
import os

from datetime import datetime
from sklearn.metrics import mean_squared_error
from math import sqrt

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from pandas.plotting import register_matplotlib_converters

from matplotlib.ticker import StrMethodFormatter
from matplotlib.dates import DateFormatter

import statsmodels.api as sm
from statsmodels.tsa.api import Holt

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('us_monthly_co2_data.csv')

In [3]:
df.head(28)

Unnamed: 0,MSN,YYYYMM,Value,Column_Order,Description,Unit
0,CKTCEUS,197301,109.552,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
1,CKTCEUS,197302,98.833,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
2,CKTCEUS,197303,98.483,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
3,CKTCEUS,197304,94.15,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
4,CKTCEUS,197305,95.424,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
5,CKTCEUS,197306,98.888,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
6,CKTCEUS,197307,104.371,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
7,CKTCEUS,197308,106.631,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
8,CKTCEUS,197309,99.436,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide
9,CKTCEUS,197310,102.224,1,"Coal, Including Coal Coke Net Imports, CO2 Emi...",Million Metric Tons of Carbon Dioxide


In [4]:
df = df[df.YYYYMM.astype(str).str[-2:] != '13']

In [5]:
df = df.drop(['MSN', 'Column_Order', 'Description', 'Unit'], axis = 1)

In [6]:
df.rename(columns={'YYYYMM':'date', 'Value':'co2'}, inplace=True)

In [8]:
df.head(14)

Unnamed: 0,date,co2
0,197301,109.552
1,197302,98.833
2,197303,98.483
3,197304,94.15
4,197305,95.424
5,197306,98.888
6,197307,104.371
7,197308,106.631
8,197309,99.436
9,197310,102.224


In [9]:
df.tail()

Unnamed: 0,date,co2
8815,202102,415.513
8816,202103,398.802
8817,202104,366.332
8818,202105,376.347
8819,202106,403.179


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8148 entries, 0 to 8819
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    8148 non-null   int64  
 1   co2     8148 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 191.0 KB


In [11]:
df.date = df.date.astype(str)

In [12]:
df.date = df.date.apply(lambda x: x[:4] + "-" + x[4:] + "-" + '01')

In [13]:
df.head(14)

Unnamed: 0,date,co2
0,1973-01-01,109.552
1,1973-02-01,98.833
2,1973-03-01,98.483
3,1973-04-01,94.15
4,1973-05-01,95.424
5,1973-06-01,98.888
6,1973-07-01,104.371
7,1973-08-01,106.631
8,1973-09-01,99.436
9,1973-10-01,102.224


In [14]:
df.date = pd.to_datetime(df.date)


In [15]:
df.head()

Unnamed: 0,date,co2
0,1973-01-01,109.552
1,1973-02-01,98.833
2,1973-03-01,98.483
3,1973-04-01,94.15
4,1973-05-01,95.424


In [16]:
df.tail()

Unnamed: 0,date,co2
8815,2021-02-01,415.513
8816,2021-03-01,398.802
8817,2021-04-01,366.332
8818,2021-05-01,376.347
8819,2021-06-01,403.179


In [17]:
df['year'] = df.date.dt.year
df['month'] = df.date.dt.month

df.head()

Unnamed: 0,date,co2,year,month
0,1973-01-01,109.552,1973,1
1,1973-02-01,98.833,1973,2
2,1973-03-01,98.483,1973,3
3,1973-04-01,94.15,1973,4
4,1973-05-01,95.424,1973,5


In [18]:
df.tail()

Unnamed: 0,date,co2,year,month
8815,2021-02-01,415.513,2021,2
8816,2021-03-01,398.802,2021,3
8817,2021-04-01,366.332,2021,4
8818,2021-05-01,376.347,2021,5
8819,2021-06-01,403.179,2021,6


In [19]:
df.date

0      1973-01-01
1      1973-02-01
2      1973-03-01
3      1973-04-01
4      1973-05-01
          ...    
8815   2021-02-01
8816   2021-03-01
8817   2021-04-01
8818   2021-05-01
8819   2021-06-01
Name: date, Length: 8148, dtype: datetime64[ns]