### Reading and writing text files

In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [3]:
#lec25.txt is available along with the code files
dframe = pd.read_csv('lec25.csv')  #Default delimiter is comma. First row is taken as column name

In [4]:
dframe

Unnamed: 0,q,r,s,t,apple
0,2,3,4,5,pear
1,a,s,d,f,rabbit
2,5,2,5,7,dog


In [5]:
dframe = pd.read_csv('lec25.csv',header = None)

In [6]:
dframe

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [7]:
#read_table is a more generic version of read_csv
dframe = pd.read_table('lec25.csv',sep=',',header=None) #This is similar to read_csv

In [8]:
dframe

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [9]:
pd.read_csv('lec25.csv',header=None,nrows=2)   #reads the first 2 rows

Unnamed: 0,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear


In [10]:
dframe.to_csv('bbtextdata_out.csv',sep="|")

In [11]:
import sys

In [12]:
dframe.to_csv(sys.stdout)  #Writing to stdout

,0,1,2,3,4
0,q,r,s,t,apple
1,2,3,4,5,pear
2,a,s,d,f,rabbit
3,5,2,5,7,dog


In [13]:
dframe.to_csv(sys.stdout,sep="_")

_0_1_2_3_4
0_q_r_s_t_apple
1_2_3_4_5_pear
2_a_s_d_f_rabbit
3_5_2_5_7_dog


In [14]:
dframe.to_csv(sys.stdout,columns=[0,1,2])

,0,1,2
0,q,r,s
1,2,3,4
2,a,s,d
3,5,2,5


### Working with JSON 
### JavaScript Object Notation

In [16]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [27]:
json_obj = """
{   "zoo animal": "Lion",
    "food":["Meat","Veggies","Honey"],
    "fur":"Golden",
    "clothes":null,
    "diet":[{"zoo_animal":"Gazelle","food":"grass","fur":"Brown"}]
}
"""

In [28]:
import json

In [30]:
data = json.loads(json_obj)

In [32]:
data

{'zoo animal': 'Lion',
 'food': ['Meat', 'Veggies', 'Honey'],
 'fur': 'Golden',
 'clothes': None,
 'diet': [{'zoo_animal': 'Gazelle', 'food': 'grass', 'fur': 'Brown'}]}

In [34]:
json.dumps(data)

'{"zoo animal": "Lion", "food": ["Meat", "Veggies", "Honey"], "fur": "Golden", "clothes": null, "diet": [{"zoo_animal": "Gazelle", "food": "grass", "fur": "Brown"}]}'

In [36]:
dframe = DataFrame(data['diet'])

In [38]:
dframe

Unnamed: 0,zoo_animal,food,fur
0,Gazelle,grass,Brown


### Working with HTML 

In [57]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from pandas import read_html

In [58]:
#need to install beautifulsoup4 and html5lib

In [42]:
url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'

In [44]:
dframe_list = pd.io.html.read_html(url)

In [47]:
dframe_list[0]

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date
0,Ericson State Bank,Ericson,NE,18265,Farmers and Merchants Bank,"February 14, 2020"
1,City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,"November 1, 2019"
2,Resolute Bank,Maumee,OH,58317,Buckeye State Bank,"October 25, 2019"
3,Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,"October 25, 2019"
4,The Enloe State Bank,Cooper,TX,10716,"Legend Bank, N. A.","May 31, 2019"
...,...,...,...,...,...,...
555,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001"
556,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001"
557,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001"
558,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000"


In [92]:
covid_url = 'https://www.worldometers.info/coronavirus/'

In [93]:
covid_list = pd.io.html.read_html(covid_url)

In [94]:
covid_stats = covid_list[0]

In [96]:
covid_stats[covid_stats['Country,Other'] == "India"]

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Tot Deaths/1M pop
42,India,695,38,14.0,2.0,45.0,636,,0.5,0.01


In [101]:
covid_stats.sort_values('TotalDeaths',ascending=False)

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Tot Deaths/1M pop
199,Total:,488091,17123,22049.0,771.0,117603.0,348439,17723.0,62.6,2.8
1,Italy,74386,,7503.0,,9362.0,57521,3489.0,1230.0,124.0
3,Spain,56188,+6673,4089.0,442.0,7015.0,45084,3166.0,1202.0,87.0
0,China,81285,+67,3287.0,6.0,74051.0,3947,1235.0,56.0,2.0
5,Iran,29406,+2389,2234.0,157.0,10457.0,16715,2746.0,350.0,27.0
...,...,...,...,...,...,...,...,...,...,...
194,Montserrat,1,,,,,1,,200.0,
195,Papua New Guinea,1,,,,,1,,0.1,
196,St. Vincent Grenadines,1,,,,,1,,9.0,
197,Timor-Leste,1,,,,,1,,0.8,


In [106]:
covid_stats.nlargest(5,'TotalDeaths')

Unnamed: 0,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Tot Deaths/1M pop
199,Total:,488091,17123.0,22049.0,771.0,117603.0,348439,17723.0,62.6,2.8
1,Italy,74386,,7503.0,,9362.0,57521,3489.0,1230.0,124.0
3,Spain,56188,6673.0,4089.0,442.0,7015.0,45084,3166.0,1202.0,87.0
0,China,81285,67.0,3287.0,6.0,74051.0,3947,1235.0,56.0,2.0
5,Iran,29406,2389.0,2234.0,157.0,10457.0,16715,2746.0,350.0,27.0


### working with excel sheets 

In [60]:
#pip install xlrd
#pip install openpyxl

In [62]:
import pandas as pd

In [64]:
xlsfile = pd.ExcelFile('Lec_28_test.xlsx')

In [67]:
dframe = xlsfile.parse('Sheet1')

In [69]:
dframe

Unnamed: 0,This is a test,Unnamed: 1,Unnamed: 2
0,23,6678,456
1,234,678,456
2,234,7,345
3,34,56,234
4,5,456,4365


In [72]:
xlsfile.sheet_names

['Sheet1', 'Sheet2', 'Sheet3']