## Data and How to Source it

### What is an API?
API Stands for Application Programming Interface

### How do we make HTTPS requests in Python?    

In [28]:
import requests 
import pandas as pd
import numpy as np
import sklearn 
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup

r = requests.get('https://api.github.com/events') # Issues a get request to the API Endpoint
r.json()[0]

{'id': '20861256558',
 'type': 'IssueCommentEvent',
 'actor': {'id': 54564956,
  'login': 'GaryAustin1',
  'display_login': 'GaryAustin1',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/GaryAustin1',
  'avatar_url': 'https://avatars.githubusercontent.com/u/54564956?'},
 'repo': {'id': 214587193,
  'name': 'supabase/supabase',
  'url': 'https://api.github.com/repos/supabase/supabase'},
 'payload': {'action': 'created',
  'issue': {'url': 'https://api.github.com/repos/supabase/supabase/issues/5974',
   'repository_url': 'https://api.github.com/repos/supabase/supabase',
   'labels_url': 'https://api.github.com/repos/supabase/supabase/issues/5974/labels{/name}',
   'comments_url': 'https://api.github.com/repos/supabase/supabase/issues/5974/comments',
   'events_url': 'https://api.github.com/repos/supabase/supabase/issues/5974/events',
   'html_url': 'https://github.com/supabase/supabase/issues/5974',
   'id': 1175074448,
   'node_id': 'I_kwDODMpXOc5GCjaQ',
   'number': 5974,
 

Sometimes the encoding isn't right. We can manually change encoding using requests.encoding = 'encoding_to_use'.

### Example 1

In [15]:
resource = requests.get('https://data.cityofnewyork.us/resource/f9bf-2cp4.json')
resource.json()[2]

{'dbn': '01M450',
 'school_name': 'EAST SIDE COMMUNITY SCHOOL',
 'num_of_sat_test_takers': '70',
 'sat_critical_reading_avg_score': '377',
 'sat_math_avg_score': '402',
 'sat_writing_avg_score': '370'}

In [19]:
df = pd.read_json(resource.text)
df

Unnamed: 0,dbn,school_name,num_of_sat_test_takers,sat_critical_reading_avg_score,sat_math_avg_score,sat_writing_avg_score
0,01M292,HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES,29,355,404,363
1,01M448,UNIVERSITY NEIGHBORHOOD HIGH SCHOOL,91,383,423,366
2,01M450,EAST SIDE COMMUNITY SCHOOL,70,377,402,370
3,01M458,FORSYTH SATELLITE ACADEMY,7,414,401,359
4,01M509,MARTA VALLE HIGH SCHOOL,44,390,433,384
...,...,...,...,...,...,...
473,75X012,P.S. X012 LEWIS AND CLARK SCHOOL,s,s,s,s
474,75X754,J. M. RAPPORT SCHOOL CAREER DEVELOPMENT,s,s,s,s
475,79M645,SCHOOL FOR COOPERATIVE TECHNICAL EDUCATION,s,s,s,s
476,79Q950,GED PLUS s CITYWIDE,8,496,400,426


In [23]:
df[df.sat_critical_reading_avg_score != 's']

Unnamed: 0,dbn,school_name,num_of_sat_test_takers,sat_critical_reading_avg_score,sat_math_avg_score,sat_writing_avg_score
0,01M292,HENRY STREET SCHOOL FOR INTERNATIONAL STUDIES,29,355,404,363
1,01M448,UNIVERSITY NEIGHBORHOOD HIGH SCHOOL,91,383,423,366
2,01M450,EAST SIDE COMMUNITY SCHOOL,70,377,402,370
3,01M458,FORSYTH SATELLITE ACADEMY,7,414,401,359
4,01M509,MARTA VALLE HIGH SCHOOL,44,390,433,384
...,...,...,...,...,...,...
466,32K556,BUSHWICK LEADERS HIGH SCHOOL FOR ACADEMIC EXCE...,23,347,358,350
467,32K564,BUSHWICK COMMUNITY HIGH SCHOOL,24,359,317,358
471,75Q811,P.S. Q811,32,429,444,433
476,79Q950,GED PLUS s CITYWIDE,8,496,400,426


In [29]:
b_ref = requests.get('http://basketball-reference.com/teams/PHI/2022.html')
soup = BeautifulSoup(b_ref.text, 'html.parser')
soup


<!DOCTYPE html>

<html class="no-js" data-root="/home/bbr/build" data-version="klecko-" itemscope="" itemtype="https://schema.org/WebSite" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="ie=edge" http-equiv="x-ua-compatible"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=2.0" name="viewport">
<link href="https://d2p3bygnnzw9w3.cloudfront.net/req/202203151" rel="dns-prefetch"/>
<!-- Quantcast Choice. Consent Manager Tag v2.0 (for TCF 2.0) -->
<script async="true" type="text/javascript">
    (function() {
	var host = window.location.hostname;
	var element = document.createElement('script');
	var firstScript = document.getElementsByTagName('script')[0];
	var url = 'https://quantcast.mgr.consensu.org'
	    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, '/choice.js')
	var uspTries = 0;
	var uspTriesLimit = 3;
	element.async = true;
	element.type = 'text/javascript';
	element.src = url;
	
	firstScript.parentNode.insertBefore(element, firstScript);
	
	functio

In [35]:
found_table = soup.find_all('div', attrs={'id' : 'div_roster'})
found_table

[<div class="table_container" id="div_roster">
 <table class="sortable stats_table" data-cols-to-freeze=",2" id="roster">
 <caption>Roster Table</caption>
 <colgroup><col/><col/><col/><col/><col/><col/><col/><col/><col/></colgroup>
 <thead>
 <tr>
 <th aria-label="No." class="poptip sort_default_asc center" data-stat="number" data-tip="Uniform Number" scope="col">No.</th>
 <th aria-label="Player" class="poptip sort_default_asc center" data-stat="player" scope="col">Player</th>
 <th aria-label="Pos" class="poptip sort_default_asc center" data-stat="pos" data-tip="Position" scope="col">Pos</th>
 <th aria-label="Ht" class="poptip sort_default_asc center" data-stat="height" data-tip="Height" scope="col">Ht</th>
 <th aria-label="Wt" class="poptip sort_default_asc center" data-stat="weight" data-tip="Weight" scope="col">Wt</th>
 <th aria-label="Birth Date" class="poptip sort_default_asc center" data-stat="birth_date" scope="col">Birth Date</th>
 <th aria-label=" " class="poptip center" data-s