In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.figsize'] = (12,9)

import plotly.express as px

sns.set()
sns.set_context('talk')
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_options('display.max_rows', 30)
pd.set_options('display.max_columns', None)
pd.set_option('display.precision', 2)
pd.set_option('display.float_format', '{:.2f}'.format)

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

co2_file = "data/co2_mm_mlo.txt"

co2 = pd.read_csv(
	co2_file,
	header = None,
	comment = "#",
	sep = r\s+'
)
co2.head()

co2 = pd.read_csv(
	co2_file,
	header = None,
	comment = "#",
	sep = '\s+',
	names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']
)
co2.head()

px.line(co2, x='DecDate', y='Avg',
	markers=True, height=600)

co2.head()

co2[co2['Avg'] < 0]

co2.shape

co2["Mo"].value_counts().sort_index()

px.histogram(co2, 'Days', title="Distribution of days feature")

px.scatter(co2, x="Yr", y="Days", title="Day field by Year")

px.histogram(co2, 'Avg')

co2[co2["Avg"] < 0 ]

px.line(co2, x='DecDate', y='Avg', title="CO2 Average By Month",
	markers= True,
	height=400)

co2_drop = co2[co2['Avg'] > 0 ]

co2_NA = co2.replace(-99.99, np.nan)

co2_impute = co2.copy()
co2_impute['Avg'] = co2['Int']

def line_and_points(data, ax, title):
	ax.plot('Mo', 'Avg', data=data)
	ax.scatter('Mo', 'Avg', data=data)
	ax.set_xlim(2, 13)
	ax.set_title(title)
	ax.set_xticks(np.arange(3,13))

def data_year(data, year):
	return data[data["Yr"] == 1958]

fig, axes = plt.subplots(ncols = 3, figsize =(12, 4), sharey=True)

year = 1958
line_and_points(data_year(co2_drop, year), axes[0], title="1. Drop Missing")
line_and_points(data_year(co2_NA, year), axes[1], title="2. Missing Set to NaN")
line_and_points(data_year(co2_impute, year), axes[2], title="3. Missing Interpolated")

fig.suptitle(f"Monthly Averages for {year}")
plt.tight_layout()

px.line(co2_imput, x='DecDate', y='Avg', title="CO2 Average By Month, Imputed", markers=True, height=500)

px.line(co2_drop, x='DecDate', y='Avg', title="CO2 Average By Month, Droped", markers = True, height=500)

px.line(co2_NA, x='DecDate', y='Avg', title="CO2 Average By Month, NaN", height=500, markers=True)

co2_year = co2_impute.groupby('Yr', as_index=False).mean()
px.line(co2_year, x='Yr', y='Avg', title="CO2 Average By Year", height=500)





