# Scrapping the data from www.timeanddate.com

## Import Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import urllib.parse as urlparse
from urllib.request import urlopen, Request
import pandas as pd
from datetime import datetime

## Get URL

In [2]:
base_url = "https://www.timeanddate.com/weather/belgium/antwerp/historic?"

## Scarpped data with tags for every year

In [3]:
def scarp_year(year):
    
    all_df = pd.DataFrame()
    months = range(1,13)
    
    for month in months:
            url = f"{base_url}month={month}&year={year}"
            page = urlopen(url)
            soup = BeautifulSoup(page, "html.parser")

            Data = []
            table = soup.find('table', attrs={'id':'wt-his'})
            for tr in table.find('tbody').find_all('tr'):
                dict = {}
                dict['time'] = tr.find('th').text.strip()
                all_td = tr.find_all('td')
                dict['temp'] = all_td[1].text
                dict['weather'] = all_td[2].text
                dict['wind'] = all_td[3].text
                arrow = all_td[4].text
                dict['humidity'] = all_td[5].text
                dict['barometer'] = all_td[6].text
                dict['visibility'] = all_td[7].text
                Data.append(dict)

            date_select = soup.find('select', attrs={'id': 'wt-his-select'})
            for option in date_select.find_all('option'):
                day = option.text.split()[0]
                date_str = f"{day} {month} {year}"
                date = datetime.strptime(date_str, "%d %m %Y").strftime("%Y-%m-%d")
                day_url = f"{base_url}day={option['value']}&month={month}&year={year}"
                day_page = urlopen(day_url)
                day_soup = BeautifulSoup(day_page, "html.parser")
                day_table = day_soup.find('table', attrs={'id':'wt-his'})
                for tr in day_table.find('tbody').find_all('tr'):
                    dict = {}
                    dict['date'] = str(date)
                    dict['time'] = tr.find('th').text.strip()
                    all_td = tr.find_all('td')
                    dict['temp'] = all_td[1].text
                    dict['weather'] = all_td[2].text
                    dict['wind'] = all_td[3].text
                    arrow = all_td[4].text
                    dict['humidity'] = all_td[5].text
                    dict['barometer'] = all_td[6].text
                    dict['visibility'] = all_td[7].text
                    Data.append(dict)
            df_month = pd.DataFrame(Data)
            all_df = pd.concat([all_df, df_month]).reset_index(drop=True)
            
    
    return all_df
            

In [4]:
def wranlge(year):
    df = scarp_year(year)
    df['time'] = df['time'].str[:5]
    df.dropna(inplace=True)
    return df
    

## Scrapping the years from 2012 to 2019

In [11]:
df_2012 = wranlge(2012)  # done
df_2012.to_csv('datasets/year_2012.csv')

In [5]:
df_2013 = wranlge(2013)  # done
df_2013.to_csv('datasets/year_2013.csv')

In [6]:
df_2014 = wranlge(2014)  # done
df_2014.to_csv('datasets/year_2014.csv')

In [5]:
df_2015 = wranlge(2015)  # done
df_2015.to_csv('datasets/year_2015.csv')

In [17]:
df_2016 = wranlge(2016)  # done
df_2016.to_csv('datasets/year_2016.csv')

In [5]:
df_2017 = wranlge(2017)  # done
df_2017.to_csv('datasets/year_2017.csv')

In [6]:
df_2018 = wranlge(2018)  # done
df_2018.to_csv('datasets/year_2018.csv')

In [5]:
df_2019 = wranlge(2019) # done
df_2019.to_csv('datasets/year_2019.csv')

## Concat all files

In [4]:
df_12 = pd.read_csv('datasets/year_2012.csv')
df_13 = pd.read_csv('datasets/year_2013.csv')
df_14 = pd.read_csv('datasets/year_2014.csv')
df_15 = pd.read_csv('datasets/year_2015.csv')
df_16 = pd.read_csv('datasets/year_2016.csv')
df_17 = pd.read_csv('datasets/year_2017.csv')
df_18 = pd.read_csv('datasets/year_2018.csv')
df_19 = pd.read_csv('datasets/year_2019.csv')

In [17]:
all_df = pd.concat([df_12, df_13, df_14, df_15, df_16, df_17, df_18, df_19])

In [19]:
all_df.to_csv('datasets/weather_in_Antwerp.csv')