## Reading Data from Different Sources

Converting json string to dataframe

In [37]:
import pandas as pd
import json
from io import StringIO
Data = '{"employee_name": "James", "email": "james@gmail.com", "job_profile":[{"title1":"Team Lead", "title2": "Sr. Developer"}]}'
df = pd.read_json(StringIO(Data))
df

<class 'str'>


Unnamed: 0,employee_name,email,job_profile
0,James,james@gmail.com,"{'title1': 'Team Lead', 'title2': 'Sr. Develop..."


Converting dataframe back to json string

In [5]:
df.to_json()    #but you can see we are getting 0 in records output so by default while reading it reads
                #with respect to index

'{"employee_name":{"0":"James"},"email":{"0":"james@gmail.com"},"job_profile":{"0":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

Converting dataframe back to json using some to_json() parameters, orient=''

In [6]:
df.to_json(orient='index')  #by default it is index

'{"0":{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}}'

In [7]:
df.to_json(orient='records')    # same as input according to records

'[{"employee_name":"James","email":"james@gmail.com","job_profile":{"title1":"Team Lead","title2":"Sr. Developer"}}]'

# Reading DataFrame from a website, header=None

In [39]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)
df
# header=None is used for specifiying use the default indexing as row and column index

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


#### converting above dataframe to csv

In [12]:
df.to_csv('My_csv.csv')

# Making dataframe by getting data from html website

In [22]:
url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/"
df = pd.read_html(url)  #remember to install lxml, html5lib, beautifulsoup4
df[0]

Unnamed: 0,Bank NameBank,CityCity,StateSt,CertCert,Acquiring InstitutionAI,Closing DateClosing,FundFund
0,Republic First Bank dba Republic Bank,Philadelphia,PA,27332,"Fulton Bank, National Association","April 26, 2024",10546
1,Citizens Bank,Sac City,IA,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
2,Heartland Tri-State Bank,Elkhart,KS,25851,"Dream First Bank, N.A.","July 28, 2023",10544
3,First Republic Bank,San Francisco,CA,59017,"JPMorgan Chase Bank, N.A.","May 1, 2023",10543
4,Signature Bank,New York,NY,57053,"Flagstar Bank, N.A.","March 12, 2023",10540
...,...,...,...,...,...,...,...
564,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001",6004
565,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001",4648
566,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001",4647
567,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000",4646


### Getting data from wikipedia tables, match="table column name

In [26]:
url2 = "https://en.wikipedia.org/wiki/Mobile_country_code"
df = pd.read_html(url2, match="Country", header=0)
county_code = df[0]   #the data is stored in table in form of list thats why we are using df[0]
county_code.to_csv("Country_Code.csv")

## Reading xlsx file, excel file

In [33]:
df_excel = pd.read_excel("data.xlsx")

## Converting file to pickle file

the process of converting a Python object into a byte stream to store it in a file/database, maintain program state across sessions, or transport data over the network.

In [35]:
df_excel.to_pickle('data_pickle.xlsx')
pd.read_pickle('data_pickle.xlsx')
# we will use pickle file in Ml to take backup of our data

Unnamed: 0,Name,Age
0,Krish,32
1,Jack,34
2,John,31
