In [None]:
import pandas as pd
from urllib import urlretrieve


In [None]:
URL = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/weekly/weekly_in_situ_co2_mlo.csv'

In [None]:
urlretrieve(URL, 'MaunaLoa.csv')

In [None]:
print(URL)

In [None]:
#!head -55 MaunaLoa.csv

In [None]:
df = pd.read_csv('MaunaLoa.csv', skiprows=44, names =['Date', 'CO2'], 
            index_col='Date', parse_dates=True )

In [None]:
#df.columns, df.index

In [None]:
df.head(2)

# Indexing

* DataFrame	frame[colname]	Series corresponding to colname

* DataFrame	df.loc[row_indexer,column_indexer]

In [None]:
df.loc['2014-05-03', 'CO2']

In [None]:
df.iloc[5, 0]

In [None]:
#View all observations that occured in May 2014
df['2014-05']

In [None]:
rng = pd.date_range('1958-03-29', periods=3031, freq='W')
rng[-1]

## Datetimes


In [None]:
df.index + pd.Timedelta(1,'Y')

## Plotting

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
df.plot()

In [None]:
df.resample('A').mean().plot()



In [None]:
df['CO2'].isnull().sum()

In [None]:
## StatsModel

In [None]:
from pylab import rcParams
import statsmodels.api as sm

rcParams['figure.figsize'] = 9, 7

y = df['CO2'].resample('MS').mean()
y = y.fillna(y.bfill())

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
#plt.show()

In [None]:
df.groupby(df.index.time).mean()

 ## Maths - Formulas and operations
 
 In Excel , you migh have something like: `sum(G2:I2)`

In [None]:
df['CO2_high'] = df['CO2']*1.2 
df['CO2_low'] = df['CO2']*0.8 


df['CO2_sum'] = df['CO2_high'] + df['CO2_low'] 

df.head(2)

In [None]:
#mathematical functions 

#If we apply a NumPy ufunc (maths faunction) on either of these objects, 
#the result will be another Pandas object with the indices preserved:

import numpy as np

np.exp(df['CO2']/ df['CO2'].mean()).head(2)

## Grouping / Aggregation

# Better plotting

In [None]:
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)
axes.plot(df.index, df)
axes.fill_between(df.index, df.CO2_low, df.CO2_high, alpha=0.2)

axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_title('title')

## Querying

DataFrame objects have a query() method that allows selection using an expression.

In [None]:
df.query('CO2 > 400').head(3)

In [None]:
df[df.CO2 > 400].head(3)

## Grouping, Pivot table

In [None]:
df2 = df.groupby(pd.TimeGrouper(freq='A')).mean()

df2.head(7)

In [None]:
# Load the data into a DataFrame
data = pd.read_csv('TradeoffData.csv')
 
data.head(n=6)


In [None]:
pd.pivot_table(data, index=['Treatment'], aggfunc='max')

In [None]:
pd.pivot_table(data, index=['Group', 'Treatment'], aggfunc='mean', values=['RelativeFitness'])

## Geographical

In [None]:
#!pip install geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim()

location = geolocator.geocode("Mauna Loa")

location.longitude, location.latitude

In [None]:
import cartopy.crs as ccrs

#projection=ccrs.Mollweide(location.longitude)
#projection=ccrs.PlateCarree()
projection = ccrs.PlateCarree(central_longitude=180)


ax = plt.axes(projection=projection)
ax.stock_img()

ax.set_extent([160, -120, -10, 50])

ax.coastlines()

plt.plot(location.longitude, location.latitude,
         color='red', linewidth=2, marker='o',
         transform=ccrs.PlateCarree(),
         )