## Read Data from different sources

In [28]:
import pandas as pd
from io import StringIO


In [14]:
data = ''' 
{
  "id": 101,
  "name": "Alex Ray",
  "email": "alex.ray@example.com",
  "isActive": true,
  "courses": [
    "History",
    "Math",
    "Science"
  ]
}
'''
info = pd.read_json(StringIO(data))
info

Unnamed: 0,id,name,email,isActive,courses
0,101,Alex Ray,alex.ray@example.com,True,History
1,101,Alex Ray,alex.ray@example.com,True,Math
2,101,Alex Ray,alex.ray@example.com,True,Science


In [21]:
info.to_json(orient="index")


'{"0":{"id":101,"name":"Alex Ray","email":"alex.ray@example.com","isActive":true,"courses":"History"},"1":{"id":101,"name":"Alex Ray","email":"alex.ray@example.com","isActive":true,"courses":"Math"},"2":{"id":101,"name":"Alex Ray","email":"alex.ray@example.com","isActive":true,"courses":"Science"}}'

In [None]:
# read data from a link
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data")
df

Unnamed: 0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
0,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
1,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
2,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
3,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
4,1,14.20,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
173,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
174,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
175,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [25]:
# Convert data back to csv
df.to_csv("wine.csv")

In [32]:
# Read data from html files

url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/"
df = pd.read_html(url)
df

[                                            Bank Name                City  \
 0                        The Santa Anna National Bank          Santa Anna   
 1                                Pulaski Savings Bank             Chicago   
 2                  The First National Bank of Lindsay             Lindsay   
 3               Republic First Bank dba Republic Bank        Philadelphia   
 4                                       Citizens Bank            Sac City   
 5                            Heartland Tri-State Bank             Elkhart   
 6                                 First Republic Bank       San Francisco   
 7                                      Signature Bank            New York   
 8                                 Silicon Valley Bank         Santa Clara   
 9                                   Almena State Bank              Almena   
 10                         First City Bank of Florida   Fort Walton Beach   
 11                               The First State Bank       Bar

In [34]:
# scrap data from webpages

url = 'https://en.wikipedia.org/wiki/Mobile_country_code'
pd.read_html(url, match="Country", header=0)

[     Mobile country code                                    Country ISO 3166  \
 0                    289                                 A Abkhazia    GE-AB   
 1                    412                                Afghanistan       AF   
 2                    276                                    Albania       AL   
 3                    603                                    Algeria       DZ   
 4                    544  American Samoa (United States of America)       AS   
 ..                   ...                                        ...      ...   
 247                  452                                    Vietnam       VN   
 248                  543                        W Wallis and Futuna       WF   
 249                  421                                    Y Yemen       YE   
 250                  645                                   Z Zambia       ZM   
 251                  648                                   Zimbabwe       ZW   
 
                          

In [None]:
''' 
you can open excel files, arrow, feather and other types of dataset files in pandas
'''

In [42]:
# convert a df into a pickle file

df = pd.read_csv("wine.csv")
df.to_pickle("wine.pickle")

In [43]:
# convert the pickle file back to csv

df = pd.read_pickle("wine.pickle")
df

Unnamed: 0.1,Unnamed: 0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
0,0,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
1,1,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
2,2,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
3,3,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
4,4,1,14.20,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,172,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
173,173,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
174,174,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
175,175,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840
