# Pandas cookbook
http://nbviewer.jupyter.org/github/jvns/pandas-cookbook/tree/v0.1/cookbook/

## Chapter 1 - Reading from csv

In [None]:
import pandas as pd
import os
os.getcwd()

In [None]:
broken_df = pd.read_csv(".\\data\\bikes.csv")

In [None]:
broken_df[3:]

In [None]:
fix_df = pd.read_csv('./data/bikes.csv', sep = ';', encoding = 'utf-8', parse_dates= ['Date'],
                        dayfirst=True, index_col = 'Date')

In [None]:
fix_df['Berri 1'].plot()

In [None]:
fix_df.plot(figsize=(8, 8))

## 2. Select & find

In [None]:
import pandas as pd
complaints = pd.read_csv('./data/311-service-requests.csv', low_memory = False)


In [None]:
# pd.set_option('display.mpl_style', 'default') 
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 50) 

In [None]:
complaints[:5]

In [None]:
complaints[["Complaint Type", "Borough"]]

In [None]:
complaints["Borough"].value_counts().plot(kind = 'bar')

## 3. More selecting data

In [None]:
import pandas as pd

In [None]:
complaints = pd.read_csv('./data/311-service-requests.csv', low_memory = False)

In [None]:
noise_complaints = complaints[complaints["Complaint Type"] == 'Noise - Street/Sidewalk']

In [None]:
noise_complaints.head()

In [None]:
noise_complaints_ct = noise_complaints["Borough"].value_counts()
complaints_ct = complaints["Borough"].value_counts()
complaints.groupby(["Borough", "Agency"]).size()

In [None]:
(noise_complaints_ct / complaints_ct).plot(kind = 'bar')

## 4. Grouping

In [None]:
import pandas as pd
bikes = pd.read_csv('./data/bikes.csv', sep = ';', encoding = 'utf-8', parse_dates= ['Date'],
                        dayfirst=True, index_col = 'Date')

In [None]:
berri = bikes[['Berri 1']]

In [None]:
berri['weekday'] = berri.index.weekday

In [None]:
weekday_berri = berri.groupby('weekday').sum()
weekday_berri.index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'San']

In [None]:
weekday_berri.plot(kind = 'bar')

## 5 Scrap scrap scrap!

In [None]:
complaints[complaints["Complaint Type"].str.contains('Noise')]["Borough"].value_counts().plot(kind = 'bar')

## 6 Data munging

In [48]:
import pandas as pd
import numpy as np

In [15]:
weather_2012 = pd.read_csv('./data/weather_2012.csv', parse_dates= True, index_col= 'Date/Time')

In [42]:
isSnowing = weather_2012["Weather"].str.contains('Snow')

In [50]:
# not working
isSnowing.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x271776d3320>

In [51]:
weather_2012

Unnamed: 0_level_0,Temp (C),Dew Point Temp (C),Rel Hum (%),Wind Spd (km/h),Visibility (km),Stn Press (kPa),Weather
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-01-01 00:00:00,-1.8,-3.9,86,4,8.0,101.24,Fog
2012-01-01 01:00:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2012-01-01 02:00:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
2012-01-01 03:00:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
2012-01-01 04:00:00,-1.5,-3.3,88,7,4.8,101.23,Fog
2012-01-01 05:00:00,-1.4,-3.3,87,9,6.4,101.27,Fog
2012-01-01 06:00:00,-1.5,-3.1,89,7,6.4,101.29,Fog
2012-01-01 07:00:00,-1.4,-3.6,85,7,8.0,101.26,Fog
2012-01-01 08:00:00,-1.4,-3.6,85,9,8.0,101.23,Fog
2012-01-01 09:00:00,-1.3,-3.1,88,15,4.0,101.20,Fog


In [54]:
weather_2012['Temp (C)'].resample('M').apply(np.median).plot()

<matplotlib.axes._subplots.AxesSubplot at 0x271776d3320>

In [56]:
pd.DataFrame([1, 2, 3]).plot()

<matplotlib.axes._subplots.AxesSubplot at 0x271776d3a20>