<a href="https://colab.research.google.com/github/j-buss/wi-dpi-analysis/blob/development/eda/4.0_Gold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Salary and Education in Wisconsin - 4.0 Gold Layer

This it the 4th in a series of notebooks depicting the steps to acquire, store and analyze data pertaining to teachers in Wisconsin.



1.   List item
2.   List item
3.   Refined Data - Create ***all_staff_record*** tables by year

## Prep

In [3]:
!pip install --upgrade google-cloud-bigquery

Collecting google-cloud-bigquery
[?25l  Downloading https://files.pythonhosted.org/packages/6f/c1/74dce5b9ffde50910082431e9117e221f18978efec88a085e3ec46d63ed4/google_cloud_bigquery-1.12.1-py2.py3-none-any.whl (130kB)
[K     |██▌                             | 10kB 15.3MB/s eta 0:00:01[K     |█████                           | 20kB 3.2MB/s eta 0:00:01[K     |███████▌                        | 30kB 4.6MB/s eta 0:00:01[K     |██████████                      | 40kB 3.0MB/s eta 0:00:01[K     |████████████▋                   | 51kB 3.7MB/s eta 0:00:01[K     |███████████████                 | 61kB 4.4MB/s eta 0:00:01[K     |█████████████████▋              | 71kB 5.0MB/s eta 0:00:01[K     |████████████████████            | 81kB 5.7MB/s eta 0:00:01[K     |██████████████████████▋         | 92kB 6.3MB/s eta 0:00:01[K     |█████████████████████████▏      | 102kB 4.9MB/s eta 0:00:01[K     |███████████████████████████▋    | 112kB 4.9MB/s eta 0:00:01[K     |█████████████████████

### Import Libraries

In [0]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 5)
import seaborn as sns
import matplotlib.pyplot as plt

from google.cloud import bigquery

In [0]:
%matplotlib inline
plt.style.use('bmh')

### Functions

In [0]:
def create_dataset(client, project_id, dataset_name):
  
  
  dataset_id = "{}.{}".format(project_id, dataset_name)
  dataset = bigquery.Dataset(dataset_id)
  dataset.location = "US"

  dataset = client.create_dataset(dataset)
  print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

In [0]:
def create_table(sql, dataset, tablename, bq_client):
  job_config = bigquery.QueryJobConfig()
  # Set the destination table
  table_ref = bq_client.dataset(dataset).table(tablename)
  job_config.destination = table_ref

  # Start the query, passing in the extra configuration.
  query_job = bq_client.query(
      sql,
      # Location must match that of the dataset(s) referenced in the query
      # and of the destination table.
      location='US',
      job_config=job_config)  # API request - starts the query

  query_job.result()  # Waits for the query to finish
  print('Query results loaded to table {}'.format(table_ref.path))

## Processing

In [0]:
# Authenticate to GCS.
from google.colab import auth
auth.authenticate_user()

In [0]:
project_id='wi-dpi-010'
landing_dataset_name='landing'
refined_dataset_name='refined'
gold_dataset_name='gold'


In [0]:
bq_client = bigquery.Client(project=project_id)

In [0]:
#Create Refined Dataset - Likely already done
#create_dataset(bq_client, project_id, gold_dataset_name)

In [0]:
select_gold_person = '''
  SELECT
    distinct id_nbr,
    first_name,
    last_name,
    file_number,
    gender,
    race_ethnicity,
    birth_year,
    high_degree_cd,
    high_degree_desc,
    year_session,
    local_exp,
    total_exp,
    salary,
    benefits
  FROM
    `wi-dpi-010.refined.2015_all_staff_report`

'''

In [15]:
create_table(select_gold_person, gold_dataset_name, gold_person_tablename, bq_client)

Query results loaded to table /projects/wi-dpi-010/datasets/gold/tables/person


In [0]:
select_gold_party_w_salary = '''
  SELECT
    id_nbr,
    first_name,
    last_name,
    year_session,
    max(salary) as sal
  FROM
    `wi-dpi-010.refined.2015_all_staff_report`
  GROUP BY 1, 2, 3, 4
  HAVING max(salary) > 0
'''

In [9]:
create_table(select_gold_party_w_salary, gold_dataset_name, 'party_w_salary', bq_client)

Query results loaded to table /projects/wi-dpi-010/datasets/gold/tables/party_w_salary


In [0]:
df = pd.read_gbq("select * from {}.{}".format(refined_dataset_name, "2015_all_staff_report"),project_id=project_id)

  """Entry point for launching an IPython kernel.


Unnamed: 0,id_nbr,first_name,last_name,file_number,gender,race_ethnicity,birth_year,high_degree_cd,high_degree_desc,year_session,contract_days,local_exp,total_exp,salary,benefits,staff_category_cd,staff_category_desc,school_cd,position_cd,position_description,position_type_cd,position_type_desc,assignment_area_cd,assignment_area_desc,low_grade_cd,low_grade_desc,high_grade_cd,high_grade_desc,assignment_fte,school_name,grade_level_cd,grade_level_desc,cesa_number,cnty_name,school_mailing_address1,school_mailing_address2,mail_city,mail_st,mail_zip_cd
0,333790,Richard,Waski,214440,M,W,1972,6,6-year Specialist's degree,2015R,260,2.0,19.0,119722.0,42582.0,1,Professional-Regular Education,,5,District Administrator,A,Administrative,0,No Description Beyond Position,K4,4-year-old Kindergarten,12,Twelfth Grade,1.0,Adams-Friendship Area Sch Dist,,,5,Adams County,201 W 6th St,Friendship WI 53934-9135,Friendship,WI,53934-9135
1,282202,Nicholas,Cochart,679898,M,W,1981,6,6-year Specialist's degree,2015R,260,4.0,10.0,150000.0,41648.0,1,Professional-Regular Education,,5,District Administrator,A,Administrative,0,No Description Beyond Position,K4,4-year-old Kindergarten,12,Twelfth Grade,0.5,Algoma Sch Dist,,,7,Kewaunee County,1715 Division St,Algoma WI 54201-1498,Algoma,WI,54201-1498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173822,397111,Brad,Johannes,677987,M,W,1981,,,2015R,0,,,0.0,0.0,4,Support-Special Education,7105,96,,,,870,,PK,Pre-Kindergarten,12,Twelfth Grade,1.0,Winnebago MHI,7,Combined Elementary/Secondary School,,Winnebago County,PO Box 9,Winnebago WI 54985-0009,Winnebago,WI,54985-0009
173823,392557,Fallon,Crowe,671093,F,W,1982,,,2015R,0,,,0.0,0.0,4,Support-Special Education,7105,96,,,,870,,PK,Pre-Kindergarten,12,Twelfth Grade,0.5,Winnebago MHI,7,Combined Elementary/Secondary School,,Winnebago County,PO Box 9,Winnebago WI 54985-0009,Winnebago,WI,54985-0009


In [0]:
df.head()

NameError: ignored