In [4]:
import pandas as pd
from io import StringIO

# Converting raw key value pair data into datafram and back 
data ='{"Name": "Alice Johnson","Job": "Software Engineer","Job Description": [{"Title1":"Backend Developer","Title2":"Database Admin"}],"Email": "alice.j@techcorp.com","Phone": "+1-555-1234"}'
df = pd.read_json(StringIO(data))
df

Unnamed: 0,Name,Job,Job Description,Email,Phone
0,Alice Johnson,Software Engineer,"{'Title1': 'Backend Developer', 'Title2': 'Dat...",alice.j@techcorp.com,+1-555-1234


In [9]:
# back to jason
df.to_json(orient='records')

"""
Orient Type	                Description	Example                                    Output
'columns' (default)	        Columns as keys, rows as dictionaries	                { "Name": {"0": "Alice", "1": "Bob"} }
'records'	                List of dictionaries (good for APIs)	                [{"Name": "Alice", "Age": 25}]
'index'	                    Row indices as keys	                                    { "0": {"Name": "Alice", "Age": 25} }
'split'	                    Dictionary with index, columns, and data separately	    { "columns": ["Name", "Age"], "data": [["Alice", 25]] }
'table'	                    JSON table schema format	                            { "schema": {...}, "data": [...] }"""

'[{"Name":"Alice Johnson","Job":"Software Engineer","Job Description":{"Title1":"Backend Developer","Title2":"Database Admin"},"Email":"alice.j@techcorp.com","Phone":"+1-555-1234"}]'

In [None]:
# reading csv data from url
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
df.head()
df.to_csv('titanic.csv')                 # convert the dataframe into a csv file

In [27]:
# reading json data from url
url = "https://raw.githubusercontent.com/dariusk/corpora/master/data/foods/pizzaToppings.json"
df = pd.read_json(url)
df.head()
df.to_json("pizza_toppings.json",orient='records')

In [None]:
# reading html data from url 
url = 'https://www.fdic.gov/bank-failures/failed-bank-list'
df = pd.read_html(url,header=0)
df[0]

Unnamed: 0,Bank Name,City,State,Cert,Aquiring Institution,Closing Date,Fund Sort ascending
0,Pulaski Savings Bank,Chicago,Illinois,28611,Millennium Bank,"January 17, 2025",10548
1,The First National Bank of Lindsay,Lindsay,Oklahoma,4134,"First Bank & Trust Co., Duncan, OK","October 18, 2024",10547
2,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
3,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
4,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
5,First Republic Bank,San Francisco,California,59017,"JPMorgan Chase Bank, N.A.","May 1, 2023",10543
6,Signature Bank,New York,New York,57053,"Flagstar Bank, N.A.","March 12, 2023",10540
7,Silicon Valley Bank,Santa Clara,California,24735,First Citizens Bank & Trust Company,"March 10, 2023",10539
8,Almena State Bank,Almena,Kansas,15426,Equity Bank,"October 23, 2020",10538
9,First City Bank of Florida,Fort Walton Beach,Florida,16748,"United Fidelity Bank, fsb","October 16, 2020",10537


In [21]:
# reading data from an excel file
data_excel = pd.read_excel('excel_data.xlsx')
data_excel

Unnamed: 0,Id,Name,Age,Score
0,1,Bob,20,80
1,2,Anne,19,87
2,3,Dean,21,90
3,4,Robert,22,94


📌 What is a Pickle File (.pkl)?

A pickle file is a special file format used in Python to save and load objects efficiently. It stores Python objects in a binary format, making it faster than other formats like CSV or JSON.

In [25]:
df= pd.DataFrame({'Name':['Alice','Bob'],'Age':[20,21]})
df.to_pickle("data.pkl")

🔹 Why Use Pickle?

Saves Python objects (DataFrames, lists, dictionaries, etc.).
Loads data faster than CSV/JSON.
Preserves data structure (unlike CSV, which only saves tables).
Works easily with Pandas and NumPy.

In [26]:
data_load_pickle = pd.read_pickle("data.pkl")
print(data_load_pickle)

    Name  Age
0  Alice   20
1    Bob   21
