In [None]:
# 4 Major Python Libraries for Web Crawling
# (1) Pandas - Parsing HTML Tables
# (2) Request - Parsing HTML Codes
# (3) BeautifulSoup - Analyzing HTML Codes
# (4) Selenium - Automating Browser Activities

# All labs in these lessons are meant for demonstrating web crawling techniques only.
# Please Google and try to understand in details the ethics and best practice for web crawling.
# e.g. https://sunscrapers.com/blog/web-crawling-scraping-best-practices/

In [18]:
# install all these libraries to your environment
import pandas as pd

In [19]:
df = pd.read_html("https://footwearsinfoline.tripod.com/shoe_directory_3.htm")
#type(df)

In [20]:
# Visualize the table shape for further processing
df[0]

Unnamed: 0,0,1
0,National Rubber & Chemical Industries,National Rubber & Chemical Industries
1,Footwear,Footwear
2,"Address: Basti Bawa Khel, Jalandar India - ...",Contact: Tel: 181-2254419 Fax: 181-2204187
3,Send Business InquiryEdit Download as vCard,Send Business InquiryEdit Download as vCard


In [21]:
# Name
df[0].iloc[0, 1]

'National  Rubber & Chemical Industries'

In [22]:
# Segment
df[0].iloc[1, 1]

'Footwear'

In [23]:
# Address
df[0].iloc[2, 0]

'Address:  Basti  Bawa Khel, Jalandar  India - 144021'

In [24]:
# Contact
df[0].iloc[2, 1]

'Contact:  Tel: 181-2254419  Fax: 181-2204187'

In [25]:
name = []
segment = []
address = []
contact = []

# Plug the names, segments, addresses and contacts into empty arrays/lists
for i in range(0, len(df)):
    name.append(df[i].iloc[0, 1])
    segment.append(df[i].iloc[1, 1])
    address.append(df[i].iloc[2, 0])
    contact.append(df[i].iloc[2, 1])
    
# Form the dataframe from the prepared arrays/lists
consolidated_df = pd.DataFrame()
consolidated_df["Name"] = name
consolidated_df["Segment"] = segment
consolidated_df["Address"] = address
consolidated_df["Contact"] = contact

consolidated_df

Unnamed: 0,Name,Segment,Address,Contact
0,National Rubber & Chemical Industries,Footwear,"Address: Basti Bawa Khel, Jalandar India - ...",Contact: Tel: 181-2254419 Fax: 181-2204187
1,NAVEEN RUBBER INDUSTRY,MANUFACTURER OF FOOTWEAR.,"Address: 69/6A NAJAFGARH ROAD NEW DELHI, DE...","Contact: PREVEEN SATIJA Tel: 011 254 35861, ..."
2,NAVYUG LAMINATES,"EVA, FIBRE, ETC. THERMO FILMS Footwear","Address: C-124, FOCAL POINT PHASE-V, LUDHIA...",Contact: MR. ANUJ SETH (DIRECTOR) Tel: 91-16...
3,NEBCO TRADERS,"FOOTWEAR RAW MATERIALS, ARTIFICIAL LEATHER CL...","Address: 59/61, GOA STREET, NEAR G.P.O., FOR...",Contact: MR. RAMESH NEBHNANI Tel: 91-22-2615...
4,NEW INDIA SEWING MACHINE COMPANY,"Footwear Machinery, Leather Goods Machinery, ...","Address: 120/828 Ranjeet Nagar Kanpur, Utta...","Contact: Tel: 0512-2216339, 2217069 Fax: 051..."
...,...,...,...,...
149,ZEE FOOTWEARS PVT.LTD.,SHOES/FOOTWEAR & UPPERS,"Address: E-13,14,SITE -IV, INDL. AREA, SAHI...",Contact: MR. PRAMOD JAIN Tel: 01202771747/27...
150,ZETA LEATHER EXPORTS,MANUFACTURERS & EXPORTERS OF : HIGH QUALITY M...,"Address: OPP. ROSE LAND PUBLIC SCHOOL, VILLA...",Contact: MR. ROHIT JAFA MR. PARKASH VEER YADA...
151,ZOOM TECH INDUSTRIES,MANUFACTURERS OF SHOES & SHOES UPPERS SHOES/F...,"Address: C 85A, SEC 8 NOIDA, UTTAR PRADESH ...",Contact: MR KULDEEP KATIYAR Tel: 0120-452573...
152,ZOOM TECH INDUSTRIES,SHOES Footwear,"Address: C 85A, SEC 8, NOIDA India - 201301",Contact: MR KULDEEP KATIYAR Tel: 91-118-4525...


<h2>Technique: Data Cleansing</h2>

In [26]:
# Slicing Pandas Series
consolidated_df["Address"] = consolidated_df["Address"].str[10:]
consolidated_df

Unnamed: 0,Name,Segment,Address,Contact
0,National Rubber & Chemical Industries,Footwear,"Basti Bawa Khel, Jalandar India - 144021",Contact: Tel: 181-2254419 Fax: 181-2204187
1,NAVEEN RUBBER INDUSTRY,MANUFACTURER OF FOOTWEAR.,"69/6A NAJAFGARH ROAD NEW DELHI, DELHI India...","Contact: PREVEEN SATIJA Tel: 011 254 35861, ..."
2,NAVYUG LAMINATES,"EVA, FIBRE, ETC. THERMO FILMS Footwear","C-124, FOCAL POINT PHASE-V, LUDHIANA India",Contact: MR. ANUJ SETH (DIRECTOR) Tel: 91-16...
3,NEBCO TRADERS,"FOOTWEAR RAW MATERIALS, ARTIFICIAL LEATHER CL...","59/61, GOA STREET, NEAR G.P.O., FORT, MUMBAI...",Contact: MR. RAMESH NEBHNANI Tel: 91-22-2615...
4,NEW INDIA SEWING MACHINE COMPANY,"Footwear Machinery, Leather Goods Machinery, ...","120/828 Ranjeet Nagar Kanpur, Uttar Pradesh ...","Contact: Tel: 0512-2216339, 2217069 Fax: 051..."
...,...,...,...,...
149,ZEE FOOTWEARS PVT.LTD.,SHOES/FOOTWEAR & UPPERS,"E-13,14,SITE -IV, INDL. AREA, SAHIBABAD, UTT...",Contact: MR. PRAMOD JAIN Tel: 01202771747/27...
150,ZETA LEATHER EXPORTS,MANUFACTURERS & EXPORTERS OF : HIGH QUALITY M...,"OPP. ROSE LAND PUBLIC SCHOOL, VILLAGE KHANDSA...",Contact: MR. ROHIT JAFA MR. PARKASH VEER YADA...
151,ZOOM TECH INDUSTRIES,MANUFACTURERS OF SHOES & SHOES UPPERS SHOES/F...,"C 85A, SEC 8 NOIDA, UTTAR PRADESH India - 2...",Contact: MR KULDEEP KATIYAR Tel: 0120-452573...
152,ZOOM TECH INDUSTRIES,SHOES Footwear,"C 85A, SEC 8, NOIDA India - 201301",Contact: MR KULDEEP KATIYAR Tel: 91-118-4525...


In [27]:
# Slicing Pandas Series
consolidated_df["Contact"] = consolidated_df["Contact"].str[10:]
consolidated_df

Unnamed: 0,Name,Segment,Address,Contact
0,National Rubber & Chemical Industries,Footwear,"Basti Bawa Khel, Jalandar India - 144021",Tel: 181-2254419 Fax: 181-2204187
1,NAVEEN RUBBER INDUSTRY,MANUFACTURER OF FOOTWEAR.,"69/6A NAJAFGARH ROAD NEW DELHI, DELHI India...","PREVEEN SATIJA Tel: 011 254 35861, +91 11 254..."
2,NAVYUG LAMINATES,"EVA, FIBRE, ETC. THERMO FILMS Footwear","C-124, FOCAL POINT PHASE-V, LUDHIANA India",MR. ANUJ SETH (DIRECTOR) Tel: 91-161-670209 /...
3,NEBCO TRADERS,"FOOTWEAR RAW MATERIALS, ARTIFICIAL LEATHER CL...","59/61, GOA STREET, NEAR G.P.O., FORT, MUMBAI...","MR. RAMESH NEBHNANI Tel: 91-22-2615756, 26158..."
4,NEW INDIA SEWING MACHINE COMPANY,"Footwear Machinery, Leather Goods Machinery, ...","120/828 Ranjeet Nagar Kanpur, Uttar Pradesh ...","Tel: 0512-2216339, 2217069 Fax: 0512-2216285"
...,...,...,...,...
149,ZEE FOOTWEARS PVT.LTD.,SHOES/FOOTWEAR & UPPERS,"E-13,14,SITE -IV, INDL. AREA, SAHIBABAD, UTT...",MR. PRAMOD JAIN Tel: 01202771747/2771748 Fax...
150,ZETA LEATHER EXPORTS,MANUFACTURERS & EXPORTERS OF : HIGH QUALITY M...,"OPP. ROSE LAND PUBLIC SCHOOL, VILLAGE KHANDSA...",MR. ROHIT JAFA MR. PARKASH VEER YADAV Tel: 91...
151,ZOOM TECH INDUSTRIES,MANUFACTURERS OF SHOES & SHOES UPPERS SHOES/F...,"C 85A, SEC 8 NOIDA, UTTAR PRADESH India - 2...",MR KULDEEP KATIYAR Tel: 0120-4525739/4544763 ...
152,ZOOM TECH INDUSTRIES,SHOES Footwear,"C 85A, SEC 8, NOIDA India - 201301",MR KULDEEP KATIYAR Tel: 91-118-4525739 Fax: ...


In [28]:
consolidated_df.to_excel("india_shoes_3.xlsx", index=False)