# **Web Scrapping with Python**

The objective of this project is to scrap Mobile Legends data from this url "https://zathong.com/mobile-legends-builds/" and perform some simple EDA on that data.

## Import Libraries :
- pandas : 
- BeautifulSoup
- requests : 

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

## Make HTTPS requests

In this example we get "Response [200]" which indicates that the requests has succeeded.

In [3]:
url = "https://zathong.com/mobile-legends-builds/"
response = requests.get(url)
print(response)

<Response [200]>


## Parse HTML Content
Create a Beautiful Soup object by passing the HTML content to the Beautiful Soup constructor. This will allow you to navigate and search through the HTML structure.

In [4]:
content = response.text
soup = BeautifulSoup(content, 'html.parser')

## Find and Extract Data

In [5]:
## Function to get list of hero's
def get_li_character(soup):
        
        soup = soup
        
        li = soup.find(class_='is-layout-flex wp-container-6 wp-block-columns has-background').find_all('a')
        li = list(set([item['href'] for item in li]))
        
        return li

## Get Link for each Hero
li_hero = get_li_character(soup)

In [6]:
## Function to get hero data
def testing(li_hero):
## Create Empty List
    data = []
    for hero in li_hero :
        hero_data = []
                
        response = requests.get(hero)
        content = response.text
        soup = BeautifulSoup(content, 'html.parser')
        information = soup.find(class_='entry-content')
                
        try :
            ## Hero Name
            name = information.find('figure').text
            
            ## Hero Role
            role = information.find_all('p', class_='has-text-align-center')[0].text.split(':')[1].split(' ')[0]
            speciality = information.find_all('p', class_='has-text-align-center')[1].text.split(':')[1]
            lane = information.find_all('p', class_='has-text-align-center')[2].text.split(':')[1]
            tier = information.find_all('p', class_='has-text-align-center')[3].text.split(':')[1]

            ## Basic Stat
            mov_speed = information.find_all('td')[:10][0].text.split(':')[1]
            physical_att = information.find_all('td')[:10][1].text.split(':\xa0')[1]
            magic_pwr = information.find_all('td')[:10][2].text.split(':\xa0')[1]
            physical_def = information.find_all('td')[:10][3].text.split(':')[1]
            magic_def = information.find_all('td')[:10][4].text.split(':\xa0')[1]
            hp = information.find_all('td')[:10][5].text.split(':')[1]
            mana = information.find_all('td')[:10][6].text.split(':')[1]
            att_speed = information.find_all('td')[:10][7].text.split(':')[1]
            hp_reg = information.find_all('td')[:10][8].text.split(':')[1]
            mana_reg = information.find_all('td')[:10][9].text.split(':')[1]
        except :
            pass
        
        basic_info = [name, role, speciality, lane, tier]
        basic_stat = [mov_speed, physical_att, magic_pwr, physical_def, magic_def, hp, mana, att_speed, hp_reg, mana_reg]
        
        data.append(basic_info+basic_stat)
    return data

In [7]:
data = testing(li_hero)

## Store Data

In [9]:
## Function to Create Dataframe and Store Data into Dataframe. 
def store_data(data):
    col_names = ['name','role','speciality','lane','tier','mov_speed','physical_att','magic_pwr','physical_def','magic_def','hp','mana','att_speed','hp_reg','mana_reg']
    df = pd.DataFrame(data, columns=col_names)

    df[df.columns[5:].to_list()] = df[df.columns[5:].to_list()].replace(' ','')

    df['mov_speed'] = pd.to_numeric(df.mov_speed, downcast='integer', errors='coerce')
    df['physical_att'] = pd.to_numeric(df.physical_att, downcast='integer', errors='coerce')
    df['magic_pwr'] = pd.to_numeric(df.magic_pwr, downcast='integer', errors='coerce')
    df['physical_def'] = pd.to_numeric(df.physical_def, downcast='integer', errors='coerce')
    df['magic_def'] = pd.to_numeric(df.magic_def, downcast='integer', errors='coerce')
    df['hp'] = pd.to_numeric(df.hp, downcast='integer', errors='coerce')
    df['mana'] = pd.to_numeric(df.mana, downcast='integer', errors='coerce')
    df['att_speed'] = pd.to_numeric(df.att_speed, downcast='float', errors='coerce')
    df['hp_reg'] = pd.to_numeric(df.hp_reg, downcast='float', errors='coerce')
    df['mana_reg'] =  pd.to_numeric(df.mana_reg, downcast='float', errors='coerce')

    return df

df = store_data(data)

In [12]:
df

Unnamed: 0,name,role,speciality,lane,tier,mov_speed,physical_att,magic_pwr,physical_def,magic_def,hp,mana,att_speed,hp_reg,mana_reg
0,Hylos,,Guard/Initiator,Roam | Jungle,A+,260.0,105,0,17,10.0,3109,430.0,0.826,18.4,2.4
1,Moskov,,Reap/Chase,Gold,B,240.0,125,0,16,10.0,2455,420.0,0.814,6.3,3.0
2,Granger,,Burst/Reap,Gold | Jungle,S+,240.0,100,0,15,10.0,2370,520.0,0.818,5.4,4.0
3,Masha,,Push/Damage,EXP,C,250.0,101,0,20,10.0,2043,0.0,0.804,3.8,0.0
4,Minsitthar,,Initiator | Crowd Control,EXP,C,260.0,121,0,23,10.0,2698,380.0,0.852,7.4,3.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,Hayabusa,,Chase/Burst,Jungle,A+,260.0,117,0,17,10.0,2629,0.0,0.854,7.4,0.0
117,Chang’e,,Poke/Burst,Mid,S,240.0,115,0,16,10.0,2301,505.0,0.808,6.8,4.2
118,Joy,,Chase/Damage,Jungle/Mid,S+,260.0,115,0,16,10.0,2301,505.0,0.808,6.8,4.2
119,Novaria,,Burst/Poke,Mid,S,240.0,115,0,16,10.0,2301,505.0,0.808,6.8,4.2
