In [None]:
!pip install pandas
!pip install matplotlib
!pip install numpy

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Pandas
Pandas is a Python library focused on data manipulation and analysis. It can be seen as an extension of NumPy, and is based on the NumPy `ndarray`, `dtype` and corresponding functionalities. Pandas offers data structures and operations for manipulating tabular data and time series. Pandas introduces 2 new data structure - `Series` and `DataFrame`. 

## Series
The `Series` structure is similar to the 1-dimensional NumPy array, with the addition of an *index* attribute. It can be created from `list`s, `ndarray`s and similar objects. As with `ndarray`, all items must be of the same `dtype`.

In [None]:
simple_series = pd.Series(list("abcdefg"))
print(simple_series)

*Note: by default, a `Series` is given an index of sequential numbers starting from 0*

In [None]:
print(simple_series.index)
print(simple_series.values)

### `Series` index

##### Accesing `Series` by index

In [None]:
simple_series[0]
simple_series[2:5]

##### Manual `Series` index

In [None]:
indexed_series = pd.Series(np.linspace(0.,2.,9), index = list('abcdefghi'))
print(indexed_series)

In [None]:
print("indexed_series['b']:",indexed_series['b'])
print("indexed_series[1]:",indexed_series[1])
print("indexed_series['c':'h']:\n",indexed_series['c':'h'])
print("indexed_series[3:8]:\n",indexed_series[2:7])


In [None]:
indexed_series.index = [i for i in range(0,18,2)]
print(indexed_series)

In [None]:
print("indexed_series[0]:",indexed_series[0])
print("indexed_series[2]:",indexed_series[2])
print("indexed_series[2:7]:\n",indexed_series[2:7])

In [None]:
#print("indexed_series[1]:",indexed_series[1])

*Accessing `Series` elements using the square brackets `[]` operator can be confusing and inconsistent when the index is not a sequential range. Use `loc` and `iloc` instead.*

In [None]:
print(indexed_series.loc[2]) #access by Series index (index location)
print(indexed_series.iloc[2]) #acces by Series item number (integer location)

In [None]:
print(indexed_series.loc[2:8]) 
print(indexed_series.iloc[2:8]) 

In [None]:
indexed_series.loc[0] = -1
print(indexed_series)

In [None]:
indexed_series.loc[2:4] = -2
print(indexed_series)

In [None]:
indexed_series.loc[6:12] = [0,0.25,.75,1]
print(indexed_series)

In [None]:
ascii_series = pd.Series({"a":97, "b":98,"c":99,"d":100,"A":65, "B":66,"C":67,"D":68})
print(ascii_series)

In [None]:
ascii_series.loc['E'] = 69
ascii_series.loc['e'] = 101
print(ascii_series)

In [None]:
expanded_series = pd.Series({"f":102, "g":103,"h":104,"i":105,"F":70, "G":71,"H":72,"I":73})
ascii_series = ascii_series.append(expanded_series)
print(ascii_series)
print("******")
print(ascii_series.loc['a':'f'])

In [None]:
ascii_series.sort_index(inplace=True)
print(ascii_series)
print("******")
print(ascii_series.loc['a':'f'])

# DataFrame
The Pandas `DataFrame` is used to hold tabular data (tables, similar to SQL or Excel). It can be seen as a 2-dimensional `ndarray` where the columns are an ordered sequence of aligned `Series` objects (sharing the same index). It can also be seen as a specialized version of the Python `dict` object, where the keys are column names and values are the `Series` mapped to each name.

In [None]:
unicode_series = pd.Series(dict(zip(list("ABCDEFGHIabcdefghi"),[i for i in range(41,50)]+[i for i in range(61,70)])))
print(unicode_series)

In [None]:
df = pd.DataFrame({'ascii':ascii_series,'unicode':unicode_series})
df

In [None]:
print(df.index)
print(df.columns)

##### Accessing a DataFrame

In [None]:
df['ascii'] #index by columns

In [None]:
df.unicode #columns as attributes

*Note: accessing via attribute method can be dangerous if the column name corresponds to an existing attribute or function of the DataFrame object. In this case, the object's attribute will be returned (or modified!) instead of the column. It is safest to use the indexing access method.*

In [None]:
df.values

In [None]:
df["A":"a"] #despite indexing by columns, slicing is done by index

In [None]:
df[0:9] # or slice by row number

In [None]:
df[df.ascii > 70]

In [None]:
print(df.ascii > 70)
print (type(df.ascii > 70))

###### Boolean operators on DataFrame columns create a `Series` of boolean values, mapping DF indices to the results of the boolean test.

### DateFrame Views

In [None]:
df.T # transposed view

*Note: as with NumPy arrays, some functions for Series and DataFrames return a reference (view), and some return a copy. Modifying views will modify the original object.*

In [None]:
print(df.head())
print(df.tail())

In [None]:
head = df.head()
head['ascii']['A'] = 0
print(df)

In [None]:
df.T['A']['ascii'] = 65
print(df)

In [None]:
df['lower'] = df.index.str.lower()
print(df.head())

In [None]:
df['order'] = df['lower'].apply(ord) - ord('a') + 1
print(df)

In [None]:
df.drop('lower',1,inplace=True)
print(df)

### `loc` and `iloc` in DataFrames
In `DataFrame`s, the `loc` and `iloc` access by index and sequence number respectively. The accessor accepts 2 indicers, the first being the row and the second being the column.

In [None]:
df.loc['A','ascii']

In [None]:
df.loc['A',['ascii','order']]

In [None]:
df.loc['A':'E',['ascii','order']]

In [None]:
df.iloc[1,2] # row 1, column 2

In [None]:
df.iloc[6:,-1] # row 6:end, last column

In [None]:
df.iloc[1:4,1:3] #row 1-3, column 1-2

# Working Example - Sea Ice

In [None]:
sea_ice = pd.read_csv('sea-ice-fixed.csv',index_col='Date') # explicitly define column as index
sea_ice.head()

In [None]:
sea_ice.index = pd.to_datetime(sea_ice.index)
sea_ice.head()

In [None]:
sea_ice[sea_ice.index > '1980'].head(10)

In [None]:
fig, ax = plt.subplots(figsize=(9,5))
sea_ice.groupby('hemisphere').plot(ax=ax)

In [None]:
fig=plt.figure(figsize=(12, 6))
north = sea_ice[sea_ice.hemisphere == "north"]
south = sea_ice[sea_ice.hemisphere == "south"]
plt.plot(north.index,north.Extent,label='Northern Hemisphere')
plt.plot(south.index,south.Extent,label='Southern  Hemisphere')
plt.legend(bbox_to_anchor=(0., -.362, 1., .102), loc=3, ncol=2, 
           mode="expand", borderaxespad=0.)