In [122]:
#pip install lxml

Collecting lxml
  Downloading lxml-5.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: lxml
Successfully installed lxml-5.1.0
Note: you may need to restart the kernel to use updated packages.


# Common Methods for `pd.read_html()`

## Overview
`pd.read_html()` is used to extract tables from HTML pages into Pandas DataFrames.

## Common Parameters
- `io`: URL, file, or raw HTML string.
- `match`: Regex pattern to match table titles.
- `flavor`: Parser type (`'lxml'` or `'bs4'`).
- `header`: Row number(s) to use as column names.
- `index_col`: Column(s) to use as the index.
- `attrs`: Dictionary of HTML attributes to filter tables.
- `skiprows`: Number of rows to skip before reading.
- `parse_dates`: Attempt to parse columns as dates.

## Example Usage
```python
import pandas as pd

url = "https://example.com/table_page"
tables = pd.read_html(url)
print(tables[0])  # Prints the first extracted table
```



In [None]:
# `import lxml` is used to parse and manipulate XML and HTML documents efficiently in Python.
import lxml
import pandas as pd
url_df = pd.read_html("https://www.basketball-reference.com/leagues/NBA_2015_totals.html")

In [5]:
type(url_df)

list

In [None]:
# The 'url_df' Is An List Of Only 1 Element Which Is An DataFrame
df4 = url_df[0]   # The 1st And Only Element Is An Dataframe

In [9]:
df4

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Quincy Acy,PF,24,NYK,68,22,1287,152,331,...,.784,79,222,301,68,27,22,60,147,398
1,2,Jordan Adams,SG,20,MEM,30,0,248,35,86,...,.609,9,19,28,16,16,7,14,24,94
2,3,Steven Adams,C,21,OKC,70,67,1771,217,399,...,.502,199,324,523,66,38,86,99,222,537
3,4,Jeff Adrien,PF,28,MIN,17,0,215,19,44,...,.579,23,54,77,15,4,9,9,30,60
4,5,Arron Afflalo,SG,29,TOT,78,72,2502,375,884,...,.843,27,220,247,129,41,7,116,167,1035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670,490,Thaddeus Young,PF,26,TOT,76,68,2434,451,968,...,.655,127,284,411,173,124,25,117,171,1071
671,490,Thaddeus Young,PF,26,MIN,48,48,1605,289,641,...,.682,75,170,245,135,86,17,75,115,685
672,490,Thaddeus Young,PF,26,BRK,28,20,829,162,327,...,.606,52,114,166,38,38,8,42,56,386
673,491,Cody Zeller,C,22,CHO,62,45,1487,172,373,...,.774,97,265,362,100,34,49,62,156,472


In [10]:
df4.shape

(675, 30)

In [None]:
df4.head(5)

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Quincy Acy,PF,24,NYK,68,22,1287,152,331,...,0.784,79,222,301,68,27,22,60,147,398
1,2,Jordan Adams,SG,20,MEM,30,0,248,35,86,...,0.609,9,19,28,16,16,7,14,24,94
2,3,Steven Adams,C,21,OKC,70,67,1771,217,399,...,0.502,199,324,523,66,38,86,99,222,537
3,4,Jeff Adrien,PF,28,MIN,17,0,215,19,44,...,0.579,23,54,77,15,4,9,9,30,60
4,5,Arron Afflalo,SG,29,TOT,78,72,2502,375,884,...,0.843,27,220,247,129,41,7,116,167,1035


In [None]:
# It DEscribes All Columns And It's Related datatype.....
df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 675 entries, 0 to 674
Data columns (total 30 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Rk      675 non-null    object
 1   Player  675 non-null    object
 2   Pos     675 non-null    object
 3   Age     675 non-null    object
 4   Tm      675 non-null    object
 5   G       675 non-null    object
 6   GS      675 non-null    object
 7   MP      675 non-null    object
 8   FG      675 non-null    object
 9   FGA     675 non-null    object
 10  FG%     673 non-null    object
 11  3P      675 non-null    object
 12  3PA     675 non-null    object
 13  3P%     594 non-null    object
 14  2P      675 non-null    object
 15  2PA     675 non-null    object
 16  2P%     671 non-null    object
 17  eFG%    673 non-null    object
 18  FT      675 non-null    object
 19  FTA     675 non-null    object
 20  FT%     650 non-null    object
 21  ORB     675 non-null    object
 22  DRB     675 non-null    ob

### df4.to_csv("players.csv", index=False) -> Create An CSV File 'players.csv' From df4 Dataframe.....

In [None]:
# List Of All Columns....
df4.columns

Index(['Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')

# Common Methods for `pd.read_json()`

## Overview
`pd.read_json()` is used to read JSON data into a Pandas DataFrame.

## Common Parameters
- `path_or_buf`: File path, URL, or JSON string.
- `orient`: Format of JSON (`'records'`, `'split'`, `'index'`, etc.).
- `lines`: If `True`, reads JSON objects line by line.
- `dtype`: Data type for each column.
- `convert_dates`: Automatically convert date columns.
- `compression`: Handle compressed JSON files (e.g., `'gzip'`, `'zip'`).

## Example Usage
```python
import pandas as pd

json_data = '{"name": ["Alice", "Bob"], "age": [25, 30]}'
df = pd.read_json(json_data)
print(df)
```

In [16]:
url = "https://api.github.com/repos/pandas-dev/pandas/issues"
pd.read_json(url)

Unnamed: 0,url,repository_url,labels_url,comments_url,events_url,html_url,id,node_id,number,title,...,closed_at,author_association,active_lock_reason,body,reactions,timeline_url,performed_via_github_app,state_reason,draft,pull_request
0,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/58031,2210967074,I_kwDOAA0YD86DyK4i,58031,Inconsistent behaviour of GroupBy for BooleanA...,...,NaT,NONE,,Lets suppose aggregate function returns int or...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,,
1,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58030,2210863617,PR_kwDOAA0YD85q7nmu,58030,BUG: Fixed DataFrameGroupBy.transform with num...,...,NaT,NONE,,- [X] closes #57069\r\n- [X] [Tests added and ...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
2,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58029,2210700416,PR_kwDOAA0YD85q7Ea-,58029,CLN: enforce `any/all` deprecation with `datet...,...,NaT,CONTRIBUTOR,,"xref #50947, xref #58006\r\n\r\nenforced depre...",{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
3,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58028,2210429200,PR_kwDOAA0YD85q6JDw,58028,Fix DataFrame.cumsum failing when dtype is tim...,...,NaT,CONTRIBUTOR,,- [x] closes #57956 \r\n- [x] [Tests added and...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
4,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58027,2209769893,PR_kwDOAA0YD85q35DG,58027,REGR: Performance of DataFrame.stack where col...,...,NaT,MEMBER,,- [x] closes #57302 (Replace xxxx with the Git...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
5,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58026,2209752885,PR_kwDOAA0YD85q31Fu,58026,DEPR: allowing Manager in DataFrame/Series con...,...,NaT,MEMBER,,Or is the idea to change this to a FutureWarni...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
6,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58025,2209729675,PR_kwDOAA0YD85q3v8H,58025,CLN: remove no-longer-needed warning filters,...,NaT,MEMBER,,- [ ] closes #xxxx (Replace xxxx with the GitH...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
7,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/58024,2209551122,I_kwDOAA0YD86DsxMS,58024,BUG: Performing groupby with `as_index=False` ...,...,NaT,NONE,,### Pandas version checks\n\n- [X] I have chec...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,,
8,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58022,2209517133,PR_kwDOAA0YD85q3CcZ,58022,DEPR: freq keyword in PeriodArray,...,NaT,MEMBER,,- [ ] closes #xxxx (Replace xxxx with the GitH...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
9,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/58021,2209516947,PR_kwDOAA0YD85q3CaB,58021,DEPR: resample with PeriodIndex,...,NaT,MEMBER,,- [ ] closes #xxxx (Replace xxxx with the GitH...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,0.0,{'url': 'https://api.github.com/repos/pandas-d...


In [None]:
# Can Skip This For Now (GET AND POST).....
import requests
data = requests.get(url)

In [18]:
data

<Response [200]>

In [19]:
df6 = data.json()

In [20]:
df6

[{'url': 'https://api.github.com/repos/pandas-dev/pandas/issues/58031',
  'repository_url': 'https://api.github.com/repos/pandas-dev/pandas',
  'labels_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/58031/labels{/name}',
  'comments_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/58031/comments',
  'events_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/58031/events',
  'html_url': 'https://github.com/pandas-dev/pandas/issues/58031',
  'id': 2210967074,
  'node_id': 'I_kwDOAA0YD86DyK4i',
  'number': 58031,
  'title': 'Inconsistent behaviour of GroupBy for BooleanArray series ',
  'user': {'login': 'ziviland',
   'id': 1967687,
   'node_id': 'MDQ6VXNlcjE5Njc2ODc=',
   'avatar_url': 'https://avatars.githubusercontent.com/u/1967687?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/ziviland',
   'html_url': 'https://github.com/ziviland',
   'followers_url': 'https://api.github.com/users/ziviland/followers',
   'following_url': 'http