**Outline**
- Categories
- Load json (local)
- Load json (from web)
- Load csv (from web)

In [None]:
import numpy as np
import pandas as pd


## Categories
https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html  

https://stackoverflow.com/questions/28910851/python-pandas-changing-some-column-types-to-categories  


In [None]:
# Generate random data, four category-like columns, two int columns
N=100
categories = pd.DataFrame({
            'parks' : np.random.choice(['strongly agree','agree', 'disagree'], size=N),
            'playgrounds' : np.random.choice(['strongly agree','agree', 'disagree'], size=N),
            'sports' : np.random.choice(['important', 'very important', 'not important'], size=N),
            'roading' : np.random.choice(['important', 'very important', 'not important'], size=N),
            'resident' : np.random.choice([1, 2, 3], size=N),
            'children' : np.random.choice([0, 1, 2, 3], size=N)
                       })
categories.to_csv('categories.csv', index=False)

In [None]:
df = pd.read_csv('categories.csv')
df.info()

In [None]:
df.head()

In [None]:
df2 = pd.read_csv('categories.csv', dtype={'parks': 'category',
                                           'playgrounds': 'category',
                                           'sports': 'category',
                                           'roading': 'category'})
df2.info()

In [None]:
df2.describe(include='category')

In [None]:
df2.sort_values(by='parks')

### Ordering categories

In [None]:
from pandas.api.types import CategoricalDtype

In [None]:
cat_type = CategoricalDtype(categories=['disagree', 'agree', 'strongly agree'], ordered=True)

In [None]:
df2['parks'] = df2['parks'].astype(cat_type)

In [None]:
df2['parks'].dtype

In [None]:
df2.sort_values(by='parks')

## JSON

In [None]:
import json

In [None]:
obj = """
    {"name": "Wes",
     "places_lived": ["United States", "Spain", "Germany"],
     "pet": null,
     "siblings": [{"name": "Scott", "age": 30, "pets": ["Zeus", "Zuko"]},
                  {"name": "Katie", "age": 38,
                   "pets": ["Sixes", "Stache", "Cisco"]}]
} """
obj

In [None]:
data = json.loads(obj)
data

In [None]:
data['siblings']

In [None]:
df = pd.DataFrame(data['siblings'], columns=['name', 'pets'])
df

In [None]:
df.pets

## JSON from the web

Github API https://developer.github.com/v3/

>All data is sent and received as JSON.

In [None]:
import requests

In [None]:

url = 'https://api.github.com/repos/pandas-dev/pandas/issues'
resp = requests.get(url)
resp


In [None]:
resp.text

In [None]:
data = resp.json()

In [None]:
type(data[0])

In [None]:
data[0].keys()

In [None]:
data[0]['title']

In [None]:
data[0]['user']

In [None]:
data[0]['state']

In [None]:
df_github = pd.DataFrame(data, columns=['title', 'state'])


In [None]:
df_github.describe()

In [None]:
df_github.info()

## CSV from web

See Assignment06 for more details

In [None]:
# url string with variables station, year and month
url_template = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID={station}&Year={year}&Month={month}&Day=14&timeframe=1&submit=Download+Data"

In [None]:
# replace variables in string with actual values
url = url_template.format(station=2205, year=2001, month=5)
url

In [None]:
# read data into dataframe, use headers and set Date/Time column as index
weather_data = pd.read_csv(url, index_col='Date/Time', parse_dates=True)
weather_data.info()