## Problem 0

In [1]:
import numpy as np
import pandas as pd
import requests
import json
import sys
sys.tracebacklimit = 0 # turn off the error tracebacks
url = 'https://httpbin.org/user-agent'
r = requests.get(url)
myjson = json.loads(r.text)
useragent = myjson['user-agent']
headers = {'User-Agent': useragent,
           'From': 'qja8kc@virginia.edu'}

## Problem 1
CSV files tend to be more efficient in storing data because the position of the values (the column and row indices) encode information. The same information have to be stored as keys in JSON files, which tend to be repeated when they are paired with values as key-value pairs. However, if a data set were to have many NaN/null values, a JSON file would take up less memory than a CSV file because the null values would just be omitted in a JSON file, while the CSV file would have encode all the null values.

## Problem 2
The information JSON file is a list of dicts, so in our Python code, we can call Pandas' `json_normalize()` function directly on the result of the `json.loads()` function. The rows would be the 1000 dicts that make up the list, and and columns would be the keys of the aforementioned dicts.

In [2]:
sys.tracebacklimit = 0 # turn off the error tracebacks
url = 'https://data.nasa.gov/resource/y77d-th95.json'
r = requests.get(url)
myjson = json.loads(r.text)
pd.json_normalize(myjson)

Unnamed: 0,name,id,nametype,recclass,mass,fall,year,reclat,reclong,geolocation.type,geolocation.coordinates,:@computed_region_cbhk_fwbd,:@computed_region_nnqa_25f4
0,Aachen,1,Valid,L5,21,Fell,1880-01-01T00:00:00.000,50.775000,6.083330,Point,"[6.08333, 50.775]",,
1,Aarhus,2,Valid,H6,720,Fell,1951-01-01T00:00:00.000,56.183330,10.233330,Point,"[10.23333, 56.18333]",,
2,Abee,6,Valid,EH4,107000,Fell,1952-01-01T00:00:00.000,54.216670,-113.000000,Point,"[-113, 54.21667]",,
3,Acapulco,10,Valid,Acapulcoite,1914,Fell,1976-01-01T00:00:00.000,16.883330,-99.900000,Point,"[-99.9, 16.88333]",,
4,Achiras,370,Valid,L6,780,Fell,1902-01-01T00:00:00.000,-33.166670,-64.950000,Point,"[-64.95, -33.16667]",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Tirupati,24009,Valid,H6,230,Fell,1934-01-01T00:00:00.000,13.633330,79.416670,Point,"[79.41667, 13.63333]",,
996,Tissint,54823,Valid,Martian (shergottite),7000,Fell,2011-01-01T00:00:00.000,29.481950,-7.611230,Point,"[-7.61123, 29.48195]",,
997,Tjabe,24011,Valid,H6,20000,Fell,1869-01-01T00:00:00.000,-7.083330,111.533330,Point,"[111.53333, -7.08333]",,
998,Tjerebon,24012,Valid,L5,16500,Fell,1922-01-01T00:00:00.000,-6.666670,106.583330,Point,"[106.58333, -6.66667]",,


## Problem 3

In [2]:
url = 'https://www.reddit.com/r/popular/top.json'
r = requests.get(url)
myjson = json.loads(r.text)
df = pd.DataFrame()
it = lambda x: [x['subreddit'], x['title'], x['ups'], x['created_utc']]
arr = [it(myjson['data']['children'][i]['data']) for i in range(25)]
df = pd.DataFrame(data=arr, columns=['subreddit', 'title', 'ups', 'created_utc'])
df

Unnamed: 0,subreddit,title,ups,created_utc
0,urbanexploration,I broke the rules and took something from an a...,122470,1719509000.0
1,holdmyfries,HMF while I photograph this engagement,81874,1719456000.0
2,rareinsults,My God you’ve gotten fat,56828,1719474000.0
3,wholesomememes,"A miraculous survivor, Kevin Hines",54130,1719488000.0
4,Damnthatsinteresting,example of how American suburbs are designed t...,49359,1719454000.0
5,oddlysatisfying,Satisfying cones,46048,1719477000.0
6,funny,ask and ye shall receive,45057,1719455000.0
7,interestingasfuck,"A father in Shandong,China, made his own aircr...",42085,1719495000.0
8,mildlyinfuriating,The amount of people that park in my spot,41250,1719449000.0
9,interestingasfuck,Turning the Tables: When the Prey Becomes the ...,39900,1719470000.0


## Problem 4
### Part a

In [11]:
with open('shootingTeamData.json', 'r') as file:
    data = file.read()
myjson = json.loads(data)

### Part b
The team data is in the second element `resultSets`, whose value is a list with one element. The single element is a dict whose third element, called `rowSet`, contains all of the team-by-team data.

### Part c

In [13]:
df = pd.json_normalize(myjson, record_path=['resultSets', 'rowSet'])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,1610612744,Golden State,Warriors,GSW,,82,48.7,114.9,14.9,0.498,...,0.478,21.2,42.5,0.497,2.3,6.3,0.363,10.8,25.3,0.429
1,1610612759,San Antonio,Spurs,SAS,,82,48.3,103.5,14.8,0.481,...,0.506,18.3,39.8,0.46,0.9,2.6,0.341,6.1,15.9,0.381
2,1610612739,Cleveland,Cavaliers,CLE,,82,48.7,104.3,16.9,0.481,...,0.473,18.2,40.7,0.447,1.7,5.7,0.299,9.0,23.9,0.378
3,1610612746,Los Angeles,Clippers,LAC,,82,48.6,104.5,15.0,0.497,...,0.48,18.9,42.0,0.45,2.0,6.0,0.334,7.7,20.8,0.373
4,1610612760,Oklahoma City,Thunder,OKC,,82,48.6,110.2,16.1,0.48,...,0.497,17.5,38.7,0.451,1.6,5.1,0.321,6.6,18.6,0.356
5,1610612737,Atlanta,Hawks,ATL,,82,48.6,102.8,19.0,0.463,...,0.483,19.4,44.6,0.435,1.0,3.1,0.311,9.0,25.3,0.355
6,1610612745,Houston,Rockets,HOU,,82,48.6,106.5,17.2,0.433,...,0.472,15.5,36.4,0.426,2.3,7.4,0.318,8.4,23.5,0.355
7,1610612757,Portland,Trail Blazers,POR,,82,48.5,105.1,17.5,0.441,...,0.447,18.0,39.8,0.453,1.7,5.9,0.295,8.8,22.6,0.389
8,1610612758,Sacramento,Kings,SAC,,81,48.4,106.7,18.7,0.452,...,0.473,18.1,39.7,0.454,0.9,3.1,0.276,7.2,19.4,0.372
9,1610612764,Washington,Wizards,WAS,,82,48.5,104.1,15.4,0.48,...,0.483,19.5,44.3,0.439,0.7,2.7,0.254,8.0,21.5,0.371


### Part d

In [17]:
names = []
for i in range(33):
    names.append(myjson['resultSets'][0]['headers'][i])
df.columns = names
df

Unnamed: 0,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CODE,GP,MIN,PTS,PTS_DRIVE,FGP_DRIVE,...,CFGP,UFGM,UFGA,UFGP,CFG3M,CFG3A,CFG3P,UFG3M,UFG3A,UFG3P
0,1610612744,Golden State,Warriors,GSW,,82,48.7,114.9,14.9,0.498,...,0.478,21.2,42.5,0.497,2.3,6.3,0.363,10.8,25.3,0.429
1,1610612759,San Antonio,Spurs,SAS,,82,48.3,103.5,14.8,0.481,...,0.506,18.3,39.8,0.46,0.9,2.6,0.341,6.1,15.9,0.381
2,1610612739,Cleveland,Cavaliers,CLE,,82,48.7,104.3,16.9,0.481,...,0.473,18.2,40.7,0.447,1.7,5.7,0.299,9.0,23.9,0.378
3,1610612746,Los Angeles,Clippers,LAC,,82,48.6,104.5,15.0,0.497,...,0.48,18.9,42.0,0.45,2.0,6.0,0.334,7.7,20.8,0.373
4,1610612760,Oklahoma City,Thunder,OKC,,82,48.6,110.2,16.1,0.48,...,0.497,17.5,38.7,0.451,1.6,5.1,0.321,6.6,18.6,0.356
5,1610612737,Atlanta,Hawks,ATL,,82,48.6,102.8,19.0,0.463,...,0.483,19.4,44.6,0.435,1.0,3.1,0.311,9.0,25.3,0.355
6,1610612745,Houston,Rockets,HOU,,82,48.6,106.5,17.2,0.433,...,0.472,15.5,36.4,0.426,2.3,7.4,0.318,8.4,23.5,0.355
7,1610612757,Portland,Trail Blazers,POR,,82,48.5,105.1,17.5,0.441,...,0.447,18.0,39.8,0.453,1.7,5.9,0.295,8.8,22.6,0.389
8,1610612758,Sacramento,Kings,SAC,,81,48.4,106.7,18.7,0.452,...,0.473,18.1,39.7,0.454,0.9,3.1,0.276,7.2,19.4,0.372
9,1610612764,Washington,Wizards,WAS,,82,48.5,104.1,15.4,0.48,...,0.483,19.5,44.3,0.439,0.7,2.7,0.254,8.0,21.5,0.371


## Problem 5

In [21]:
with open('nba_stats.json', 'w') as file:
    json.dump(df.to_json(orient='split'), file, indent=4)