# Data Structures in Pandas

### Pandas makes use of two different data structures 
1. Series
2. Data Frames

*Series represents data in a 1D form while data frames represents data in a 2D tabular form*

In [1]:
import pandas as pd

In [2]:
dict = {'a':3, 'b':'cat', 'c':2.5}
pd.Series(dict)

a      3
b    cat
c    2.5
dtype: object

In [3]:
oneD = pd.Series([100,'cat',310,'gag',500], ['Amy','Bobby','Cat','Don','Emma'])
#the first list contains values
#the second list contains index
#index is the label of values
print(oneD)

Amy      100
Bobby    cat
Cat      310
Don      gag
Emma     500
dtype: object


In [4]:
#loc is a label-location based indexer for selection by labels - cal and Emma
oneD.loc[['Cat','Emma']]

Cat     310
Emma    500
dtype: object

In [5]:
#extract the data at index 0,3,4
oneD[[0,3,4]]

Amy     100
Don     gag
Emma    500
dtype: object

In [6]:
#ilocis primarily integer position based (from 0 to length-1 of the axis) access index 1
oneD.iloc[1]

'cat'

In [7]:
'cat' in oneD

False

In [16]:
'Cat' in oneD

True

In [17]:
d = {'A' : pd.Series([100, 200, 300], index=['apple', 'pear', 'orange']),
     'B' : pd.Series([111, 222, 333, 4444], index=['apple', 'pear', 'orange', 'melon'])}

In [19]:
df = pd.DataFrame(d)
print(d)

{'A': apple     100.0
pear      200.0
orange    300.0
dtype: float64, 'B': apple      111.0
pear       222.0
orange     333.0
melon     4444.0
dtype: float64}


In [21]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [22]:
df.index

Index(['apple', 'melon', 'orange', 'pear'], dtype='object')

In [23]:
df.columns

Index(['A', 'B'], dtype='object')

In [24]:
pd.DataFrame(df, index=['orange', 'melon', 'apple'], columns=['A'])

Unnamed: 0,A
orange,300.0
melon,
apple,100.0


# Read in CSV file

In [25]:
import numpy as np
import pandas as pd

In [28]:
file = "E:\\Udemy\\Complete Data Science Training with Python for Data Analysis\\scriptsLecture\\section4\\Resp2.csv"
df1 = pd.read_csv(file)
df1.head(9)

Unnamed: 0,experience,respiration
0,0,3.94
1,0,4.26
2,0,4.16
3,0,3.76
4,0,4.07
5,0,3.57
6,0,4.11
7,0,4.18
8,1,4.0


In [30]:
file = "E:\\Udemy\\Complete Data Science Training with Python for Data Analysis\\scriptsLecture\\section4\\winequality-red.csv"
df1 = pd.read_csv(file)
df1 = pd.read_csv(file, sep=";")
df1.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [33]:
file = "E:\\Udemy\\Complete Data Science Training with Python for Data Analysis\\scriptsLecture\\section4\\bostonTxt.txt"
df1 = pd.read_csv(file, sep="\t")
df1.head(20)

Unnamed: 0,MV,INDUS,NOX,RM,TAX,PT,LSTAT
0,24.0,2.31,53.8,6.575,296,15.3,4.98
1,21.6,7.07,46.9,6.421,242,17.8,9.14
2,34.7,7.07,46.9,7.185,242,17.8,4.03
3,33.4,2.18,45.8,6.998,222,18.7,2.94
4,36.2,2.18,45.8,7.147,222,18.7,5.33
5,28.7,2.18,45.8,6.43,222,18.7,5.21
6,22.9,7.87,52.4,6.012,311,15.2,12.43
7,27.1,7.87,52.4,6.172,311,15.2,19.15
8,16.5,7.87,52.4,5.631,311,15.2,29.93
9,18.9,7.87,52.4,6.004,311,15.2,17.1


# Read in Excel Data using Pandas

In [34]:
import pandas as pd

In [35]:
file = "E:\\Udemy\\Complete Data Science Training with Python for Data Analysis\\scriptsLecture\\section4\\boston1.xls"
x1 = pd.ExcelFile(file)
print(x1.sheet_names)

['Sheet1', 'Sheet2']


In [36]:
df1 = x1.parse('Sheet1')
df1.head()

Unnamed: 0,MV,INDUS,NOX,RM,TAX,PT,LSTAT,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,24.0,2.31,53.8,6.575,296,15.3,4.98,,,Subset of Boston housing tract
1,21.6,7.07,46.9,6.421,242,17.8,9.14,,,data of Harrison and Rubinfeld
2,34.7,7.07,46.9,7.185,242,17.8,4.03,,,(1978). Each case is one U.S.
3,33.4,2.18,45.8,6.998,222,18.7,2.94,,,Census tract in the Boston area.
4,36.2,2.18,45.8,7.147,222,18.7,5.33,,,


In [38]:
df1 = x1.parse('Sheet2')
df1.head(13)

Unnamed: 0,MV,INDUS,NOX,RM,TAX,PT,LSTAT
0,24.0,2.31,53.8,6.575,296,15.3,4.98
1,21.6,7.07,46.9,6.421,242,17.8,9.14
2,34.7,7.07,46.9,7.185,242,17.8,4.03
3,33.4,2.18,45.8,6.998,222,18.7,2.94
4,36.2,2.18,45.8,7.147,222,18.7,5.33
5,28.7,2.18,45.8,6.43,222,18.7,5.21
6,22.9,7.87,52.4,6.012,311,15.2,12.43
7,27.1,7.87,52.4,6.172,311,15.2,19.15
8,16.5,7.87,52.4,5.631,311,15.2,29.93
9,18.9,7.87,52.4,6.004,311,15.2,17.1


# Read JSON Data

In [39]:
import pandas as pd 

In [40]:
file = "E:\\Udemy\\Complete Data Science Training with Python for Data Analysis\\scriptsLecture\\section4\\skorea.json"
df = pd.read_json(file)
df.head()

Unnamed: 0,Description,Image,Criteria,Site,Area ha (acre),Location,Year
0,,/wiki/File:MuryeongsTomb.jpg,Cultural: (ii)(iii),Baekje Historic Areas,135 (330),"South Chungcheong, North Jeolla",2015
1,,/wiki/File:Korea-Gwangju-Gochang_Dolmens_5350-...,Cultural: (iii),"Gochang, Hwasun and Ganghwa Dolmen Sites",,"Incheon, North Jeolla, South Jeolla",2000
2,,"/wiki/File:Juhamnu,_Changdeokgung_-_Seoul,_Kor...",Cultural: (ii)(iii)(iv),Changdeokgung Palace Complex,,Seoul,1997
3,,/wiki/File:Korea-Gyeongju-Bunhwangsa-Lanterns-...,Cultural: (ii)(iii),Gyeongju Historic Areas,"2,880 (7,100)",North Gyeongsang,2000
4,,/wiki/File:Haeinsa_Temple_(6222053899).jpg,Cultural: (iv)(vi),"Haeinsa Temple Janggyeong Panjeon, the Deposit...",,South Gyeongsang,1995


In [41]:
df.shape

(12, 7)

In [43]:
web = pd.read_json('https://api.github.com/repos/pydata/pandas/issues?per_page=5')
web.head(4)

Unnamed: 0,url,repository_url,labels_url,comments_url,events_url,html_url,id,node_id,number,title,...,assignees,milestone,comments,created_at,updated_at,closed_at,author_association,active_lock_reason,pull_request,body
0,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/34985,645278335,MDExOlB1bGxSZXF1ZXN0NDM5Nzc2NDcx,34985,ENH: concat of nullable int + bool preserves i...,...,[],,0,2020-06-25 07:47:18+00:00,2020-06-25 07:47:18+00:00,NaT,MEMBER,,{'url': 'https://api.github.com/repos/pandas-d...,Closes #34095
1,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/34984,645275617,MDExOlB1bGxSZXF1ZXN0NDM5NzczOTgz,34984,BUG: json.decode fails for nums larger than sy...,...,[],,0,2020-06-25 07:44:52+00:00,2020-06-25 07:44:52+00:00,NaT,CONTRIBUTOR,,{'url': 'https://api.github.com/repos/pandas-d...,- [x] closes #20599 \r\n- [x] tests added\r\n-...
2,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/34983,645121426,MDExOlB1bGxSZXF1ZXN0NDM5NjM3MDAz,34983,BUG: HDFStore unable to create colindex w/o er...,...,[],,0,2020-06-25 03:36:26+00:00,2020-06-25 07:10:31+00:00,NaT,CONTRIBUTOR,,{'url': 'https://api.github.com/repos/pandas-d...,- [x] closes #28156 \r\n- [x] tests added / pa...
3,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/34982,645049802,MDExOlB1bGxSZXF1ZXN0NDM5NTc4MTgz,34982,REF: simplify advance/move/set_length in libre...,...,[],,0,2020-06-24 23:55:37+00:00,2020-06-25 01:11:13+00:00,NaT,MEMBER,,{'url': 'https://api.github.com/repos/pandas-d...,Make it so that we set `buf.data` in fewer pla...


# Read in HTML Data

In [1]:
import pandas as pd
import html5lib

In [2]:
uss = pd.read_html('https://simple.wikipedia.org/wiki/List_of_U.S._states')
print(type(uss))

<class 'list'>


In [3]:
u = uss[0]

In [4]:
print(u)

    Name &postal abbreviation[1]                                 \
    Name &postal abbreviation[1] Name &postal abbreviation[1].1   
0                        Alabama                             AL   
1                         Alaska                             AK   
2                        Arizona                             AZ   
3                       Arkansas                             AR   
4                     California                             CA   
5                       Colorado                             CO   
6                    Connecticut                             CT   
7                       Delaware                             DE   
8                        Florida                             FL   
9                        Georgia                             GA   
10                        Hawaii                             HI   
11                         Idaho                             ID   
12                      Illinois                             I

In [5]:
print(type(u))

<class 'pandas.core.frame.DataFrame'>


In [6]:
url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'

dfs = pd.read_html(url) #this is a list of dataframes

print(type(dfs))

<class 'list'>


In [7]:
df = dfs[0] #list of dataframes
print(df)

                             Bank Name           City  ST   CERT  \
0                 The First State Bank  Barboursville  WV  14361   
1                   Ericson State Bank        Ericson  NE  18265   
2     City National Bank of New Jersey         Newark  NJ  21111   
3                        Resolute Bank         Maumee  OH  58317   
4                Louisa Community Bank         Louisa  KY  58112   
..                                 ...            ...  ..    ...   
556                 Superior Bank, FSB       Hinsdale  IL  32646   
557                Malta National Bank          Malta  OH   6629   
558    First Alliance Bank & Trust Co.     Manchester  NH  34264   
559  National State Bank of Metropolis     Metropolis  IL   3815   
560                   Bank of Honolulu       Honolulu  HI  21029   

                   Acquiring Institution       Closing Date  
0                         MVB Bank, Inc.      April 3, 2020  
1             Farmers and Merchants Bank  February 14, 2020

In [8]:
whsSK=pd.read_html('https://en.wikipedia.org/wiki/List_of_World_Heritage_Sites_in_South_Korea') 

In [9]:
whs_sk = whsSK[0] #grab dataframe from index 0
print(whs_sk)

                                                 Site  Image  \
0                Seokguram Grotto and Bulguksa Temple    NaN   
1   Haeinsa Temple Janggyeong Panjeon, the Deposit...    NaN   
2                                      Jongmyo Shrine    NaN   
3                        Changdeokgung Palace Complex    NaN   
4                                   Hwaseong Fortress    NaN   
5            Gochang, Hwasun and Ganghwa Dolmen Sites    NaN   
6                             Gyeongju Historic Areas    NaN   
7                 Jeju Volcanic Island and Lava Tubes    NaN   
8                   Royal Tombs of the Joseon Dynasty    NaN   
9      Historic Villages of Korea: Hahoe and Yangdong    NaN   
10                                     Namhansanseong    NaN   
11                              Baekje Historic Areas    NaN   
12      Sansa, Buddhist Mountain Monasteries in Korea    NaN   
13             Seowon, Korean Neo-Confucian Academies    NaN   

                                       

## Page with +1 table

In [10]:
whsPH=pd.read_html('https://en.wikipedia.org/wiki/List_of_World_Heritage_Sites_in_the_Philippines')

In [11]:
whs_phl = whsPH[2] #grab dataframe from index 1, table 2 of the tentative WHS sites
print(whs_phl)

  vteWorld Heritage Sites in the Philippines  \
0                                   Cultural   
1                                    Natural   

        vteWorld Heritage Sites in the Philippines.1  \
0  Baroque Churches of the Philippines (San Agust...   
1  Mount Hamiguitan Range Wildlife Sanctuary Puer...   

   vteWorld Heritage Sites in the Philippines.2  
0                                           NaN  
1                                           NaN  
