In [1]:
import requests
import json
import pandas as pd
import tempfile as tf
import uuid

In [2]:
baseurl = 'https://cloud.ccd.pitt.edu/ccd-api'

# change the Basic authorization for your username/password (you can use Postman to generate) 
basicheader = {
    'authorization': "Basic <CHANGE ME>",    # CHANGE ME:  
    'accept': "application/json"
    }

# endpoints
jwturl = baseurl + '/jwt'
dataurl = baseurl + '/data'

In [3]:
# get JSON Web token and userId
r = requests.get(jwturl, headers=basicheader)
jwtresults = json.loads(r.text)

userId = jwtresults['userId']
jwt = jwtresults['jwt']

print (jwtresults)

# construct a header with the JSON web token
jtoken = "Bearer " + jwt
bearheader = {
    'authorization': jtoken,
    'accept': "application/json"    
    }
print (bearheader)

{u'expireTime': 1478825098517, u'jwt': u'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczovL2NjZDEudm0uYnJpZGdlcy5wc2MuZWR1LyIsInVpZCI6NCwiZXhwIjoxNDc4ODI1MDk4NTE3LCJpYXQiOjE0Nzg4MjE0OTg1MTd9.U7U3hly_ouAI5-nFW55gs4DD5GO2xf2QIHgkwzDaKbI', u'userId': 4, u'issuedTime': 1478821498517, u'lifetime': 3600}
{'accept': 'application/json', 'authorization': u'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczovL2NjZDEudm0uYnJpZGdlcy5wc2MuZWR1LyIsInVpZCI6NCwiZXhwIjoxNDc4ODI1MDk4NTE3LCJpYXQiOjE0Nzg4MjE0OTg1MTd9.U7U3hly_ouAI5-nFW55gs4DD5GO2xf2QIHgkwzDaKbI'}


## Upload file to CCD API

In [4]:
fo = open('Retention.txt', 'rb')
files = {'file': fo }

In [5]:
# post the newly saved data file from PIC-SURE query to your user directory in CCD
url = baseurl + '/' + str(userId) + '/dataset/upload'
r = requests.post(url, headers=bearheader, files=files)
print (r.text)

{"id":130,"name":"Retention.txt","creationTime":1478821509000,"lastModifiedTime":1478821509000,"fileSize":7986,"md5checkSum":"9428151131e8b981d3bf7fb5ad4a7dfd","fileSummary":{"variableType":null,"fileDelimiter":null,"numOfRows":null,"numOfColumns":null}}


In [6]:
fo.close()
print (json.dumps(json.loads(r.text), indent=4))  # pretty print the results
p_resultId = json.loads(r.text)  #  parse and get the job id
fileId = p_resultId['id']

{
    "name": "Retention.txt", 
    "creationTime": 1478821509000, 
    "fileSummary": {
        "fileDelimiter": null, 
        "numOfColumns": null, 
        "variableType": null, 
        "numOfRows": null
    }, 
    "fileSize": 7986, 
    "lastModifiedTime": 1478821509000, 
    "id": 130, 
    "md5checkSum": "9428151131e8b981d3bf7fb5ad4a7dfd"
}


## Summarize the data file
variableType = {discrete or continuous}

fileDelimiter = {tab or comma}

In [7]:
payload = "{\n    \"id\": " + str(fileId) + ",\n    \"variableType\": \"continuous\",\n    \"fileDelimiter\": \"tab\"\n}"
bearheader.update({'content-type': "application/json"})    

In [8]:
url = baseurl + '/' + str(userId) + '/dataset/summarize'
# post a request to summarize the data file
r = requests.post(url, data=payload, headers=bearheader)
print (r.status_code)
print (r.text)

200
{"id":130,"name":"Retention.txt","creationTime":1478821509000,"lastModifiedTime":1478821509000,"fileSize":7986,"md5checkSum":"9428151131e8b981d3bf7fb5ad4a7dfd","fileSummary":{"variableType":"continuous","fileDelimiter":"tab","numOfRows":171,"numOfColumns":8}}


## Run algorithm

Available alogrithms: 
- "name": "fgsc", "description": "FGS continuous" 
- "name": "fgsd", "description": "FGS discrete" 
- "name": "gfcic","description": "GFCI continuous"

In [9]:
payload = "{\n    \"datasetFileId\": " + str(fileId)+ ",\n    \"dataValidation\": {\n      \"uniqueVarName\": true,\n      \"limitNumOfCategory\": false\n    },\n    \"algorithmParameters\": {\n      \"maxDegree\": -1,\n      \"penaltyDiscount\": 2},\n    \"jvmOptions\": {\n      \"maxHeapSize\": 10\n    }\n}"

In [10]:
# use the alogrithm name as endpoint
# url = baseurl + '/' + str(userId) + '/jobs/gfcic'  # GFCI continuous
# url = baseurl + '/' + str(userId) + '/jobs/fgsd'  # FGS discrete
url = baseurl + '/' + str(userId) + '/jobs/fgsc'   # FGS Continuous
print (url)
r = requests.post(url, data=payload, headers=bearheader)
print (r.text)
p = json.loads(r.text)

https://ccd1.vm.bridges.psc.edu/ccd-api/4/jobs/fgsc
{"id":44,"algorithmName":"fgsc","status":0,"addedTime":1478821593810,"resultFileName":"fgsc_Retention.txt_1478821593808.txt","resultJsonFileName":"fgsc_Retention.txt_1478821593808.json","errorResultFileName":"error_fgsc_Retention.txt_1478821593808.txt"}


In [11]:
#  parse and get the job id, and expected result and error name 
jobId = p['id']
resultFilename = p['resultFileName']
errorFilename = p['errorResultFileName']
print (r.text)

{"id":44,"algorithmName":"fgsc","status":0,"addedTime":1478821593810,"resultFileName":"fgsc_Retention.txt_1478821593808.txt","resultJsonFileName":"fgsc_Retention.txt_1478821593808.json","errorResultFileName":"error_fgsc_Retention.txt_1478821593808.txt"}


## Show the job status

Note: an error of 'Not Found' may just mean that the job was processed and no longer in the job queue

In [12]:
url = baseurl + '/' + str(userId) + '/jobs'
r = requests.get(url + '/' + str(jobId), headers=bearheader)
print (r.text)

{"timestamp":1478821629666,"status":404,"error":"Not Found","message":"Unable to find job with ID 44 for user with ID: 4","path":"/4/jobs/44"}


## Show the algorithm result

In [13]:
# shows a report-based summary, non-graphical view
url = baseurl + '/' + str(userId) + '/results'
r = requests.get(url + '/' + resultFilename, headers=bearheader)
# if the result file is not found check the error file
if (r.status_code == 404):
    r = requests.get(url + '/' + errorFilename, headers=bearheader)
print (r.text)

FGS Continuous (Thu, November 10, 2016 06:46:35 PM)

Runtime Parameters:
verbose = false
number of threads = 2

Dataset:
file = Retention.txt
delimiter = tab
cases read in = 170
variables read in = 8

Algorithm Parameters:
penalty discount = 2.000000
max degree = -1
faithfulness assumed = false

Data Validations:
ensure variable names are unique = false
ensure variables have non-zero variance = false


Graph Nodes:
spending_per_stdt,grad_rate,stdt_clss_stndng,rjct_rate,tst_scores,stdt_accept_rate,stdt_tchr_ratio,fac_salary

Graph Edges:
1. fac_salary --- spending_per_stdt
2. spending_per_stdt --> rjct_rate
3. spending_per_stdt --> stdt_clss_stndng
4. spending_per_stdt --- stdt_tchr_ratio
5. spending_per_stdt --- tst_scores
6. stdt_accept_rate --- fac_salary
7. stdt_accept_rate --> grad_rate
8. stdt_clss_stndng --> rjct_rate
9. stdt_tchr_ratio --> stdt_clss_stndng
10. tst_scores --- fac_salary
11. tst_scores --> grad_rate
12. tst_scores --> stdt_clss_stndng

